feat(lexer): add f-string tokens and scanning

2026-02-07 15:15:54 +01:00
parent 63c8e3ab12
commit d88e640273
3 changed files with 85 additions and 10 deletions
--- a/examples/basic/22_string_formatting.peb
+++ b/examples/basic/22_string_formatting.peb
@@ -0,0 +1,29 @@
+let def = "DEF"
+let s = f"abc {def} {"ghi"}"
+print(s)
+
+/*
+class Person {
+    init(name) {
+        this.name = name
+    }
+
+    greet(person) {
+        if person == null {
+            print(f"Hello, I'm {this.name}")
+        } else {
+            print(f"Hello {person.name}, I'm {this.name}")
+        }
+    }
+}
+
+fun meet(person1, person2) {
+    person1.greet(null)
+    person2.greet(person1)
+}
+
+let alice = Person("Alice")
+let bob = Person("Bob")
+
+meet(alice, bob)
+*/
--- a/src/token/lexer.py
+++ b/src/token/lexer.py
@@ -1,4 +1,4 @@
-from typing import Optional, Any
+from typing import Optional, Any, Callable

 from src.token.keyword import KEYWORDS
 from src.token.position import Position
@@ -21,12 +21,13 @@ class Lexer:
        raise SyntaxError(f"[ERROR] Error at {self.start_pos}: {msg}")

    def process(self) -> list[Token]:
-        while not self.is_at_end():
-            self.start_pos = self.get_position()
-            self.start = self.idx
-            self.scan_token()
-
-        self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
+        try:
+            self.scan_tokens()
+            self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
+        except Exception as e:
+            print("Partially parsed tokens:")
+            print(self.tokens)
+            raise e
        return self.tokens

    def is_at_end(self) -> bool:
@@ -63,6 +64,10 @@ class Lexer:
            return True
        return False

+    def update_start(self):
+        self.start_pos = self.get_position()
+        self.start = self.idx
+
    def add_token(self, token_type: TokenType, value: Optional[Any] = None):
        lexeme: str = self.source[self.start:self.idx]
        self.tokens.append(
@@ -74,6 +79,13 @@ class Lexer:
            )
        )

+    def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
+        if condition is None:
+            condition = lambda: True
+        while condition() and not self.is_at_end():
+            self.update_start()
+            self.scan_token()
+
    def scan_token(self):
        char: str = self.advance()
        match char:
@@ -120,10 +132,12 @@ class Lexer:
                while self.peek().isspace() and self.peek() != "\n" and not self.is_at_end():
                    self.advance()
                self.add_token(TokenType.WHITESPACE)
-            case '"':
-                self.scan_string()
            case _:
-                if char.isdigit():
+                if char == "f" and self.match('"'):
+                    self.scan_fstring()
+                elif char == '"':
+                    self.scan_string()
+                elif char.isdigit():
                    self.scan_number()
                elif char.isalpha():
                    self.scan_identifier()
@@ -142,6 +156,35 @@ class Lexer:
        value: str = self.source[self.start + 1:self.idx - 1]
        self.add_token(TokenType.STRING, value)

+    def scan_fstring(self):
+        self.add_token(TokenType.FSTRING_START)
+        self.update_start()
+        while self.peek() != '"' and not self.is_at_end():
+            if self.peek() == "{" and self.peek_next() != "{":
+                self.add_fstring_text()
+                self.advance()
+                self.add_token(TokenType.LEFT_BRACE)
+                self.scan_fstring_embed()
+            else:
+                self.advance()
+        self.add_fstring_text()
+        self.advance()
+        self.add_token(TokenType.FSTRING_END)
+
+    def add_fstring_text(self):
+        value: str = self.source[self.start:self.idx]
+        self.add_token(TokenType.FSTRING_TEXT, value)
+        self.update_start()
+
+    def scan_fstring_embed(self):
+        self.scan_tokens(lambda: self.peek() != "}")
+        if self.is_at_end():
+            self.error("Unterminated f-string embed")
+        self.update_start()
+        self.advance()
+        self.add_token(TokenType.RIGHT_BRACE)
+        self.update_start()
+
    def scan_number(self):
        while self.peek().isdigit():
            self.advance()
--- a/src/token/token.py
+++ b/src/token/token.py
@@ -37,6 +37,9 @@ class TokenType(Enum):
    # Literals
    IDENTIFIER = auto()
    STRING = auto()
+    FSTRING_START = auto()
+    FSTRING_END = auto()
+    FSTRING_TEXT = auto()
    NUMBER = auto()
    TRUE = auto()
    FALSE = auto()