feat(lexer): add f-string tokens and scanning

This commit is contained in:
2026-02-07 15:15:54 +01:00
parent 63c8e3ab12
commit d88e640273
3 changed files with 85 additions and 10 deletions

View File

@@ -0,0 +1,29 @@
let def = "DEF"
let s = f"abc {def} {"ghi"}"
print(s)
/*
class Person {
init(name) {
this.name = name
}
greet(person) {
if person == null {
print(f"Hello, I'm {this.name}")
} else {
print(f"Hello {person.name}, I'm {this.name}")
}
}
}
fun meet(person1, person2) {
person1.greet(null)
person2.greet(person1)
}
let alice = Person("Alice")
let bob = Person("Bob")
meet(alice, bob)
*/

View File

@@ -1,4 +1,4 @@
from typing import Optional, Any
from typing import Optional, Any, Callable
from src.token.keyword import KEYWORDS
from src.token.position import Position
@@ -21,12 +21,13 @@ class Lexer:
raise SyntaxError(f"[ERROR] Error at {self.start_pos}: {msg}")
def process(self) -> list[Token]:
while not self.is_at_end():
self.start_pos = self.get_position()
self.start = self.idx
self.scan_token()
self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
try:
self.scan_tokens()
self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
except Exception as e:
print("Partially parsed tokens:")
print(self.tokens)
raise e
return self.tokens
def is_at_end(self) -> bool:
@@ -63,6 +64,10 @@ class Lexer:
return True
return False
def update_start(self):
self.start_pos = self.get_position()
self.start = self.idx
def add_token(self, token_type: TokenType, value: Optional[Any] = None):
lexeme: str = self.source[self.start:self.idx]
self.tokens.append(
@@ -74,6 +79,13 @@ class Lexer:
)
)
def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
if condition is None:
condition = lambda: True
while condition() and not self.is_at_end():
self.update_start()
self.scan_token()
def scan_token(self):
char: str = self.advance()
match char:
@@ -120,10 +132,12 @@ class Lexer:
while self.peek().isspace() and self.peek() != "\n" and not self.is_at_end():
self.advance()
self.add_token(TokenType.WHITESPACE)
case '"':
self.scan_string()
case _:
if char.isdigit():
if char == "f" and self.match('"'):
self.scan_fstring()
elif char == '"':
self.scan_string()
elif char.isdigit():
self.scan_number()
elif char.isalpha():
self.scan_identifier()
@@ -142,6 +156,35 @@ class Lexer:
value: str = self.source[self.start + 1:self.idx - 1]
self.add_token(TokenType.STRING, value)
def scan_fstring(self):
self.add_token(TokenType.FSTRING_START)
self.update_start()
while self.peek() != '"' and not self.is_at_end():
if self.peek() == "{" and self.peek_next() != "{":
self.add_fstring_text()
self.advance()
self.add_token(TokenType.LEFT_BRACE)
self.scan_fstring_embed()
else:
self.advance()
self.add_fstring_text()
self.advance()
self.add_token(TokenType.FSTRING_END)
def add_fstring_text(self):
value: str = self.source[self.start:self.idx]
self.add_token(TokenType.FSTRING_TEXT, value)
self.update_start()
def scan_fstring_embed(self):
self.scan_tokens(lambda: self.peek() != "}")
if self.is_at_end():
self.error("Unterminated f-string embed")
self.update_start()
self.advance()
self.add_token(TokenType.RIGHT_BRACE)
self.update_start()
def scan_number(self):
while self.peek().isdigit():
self.advance()

View File

@@ -37,6 +37,9 @@ class TokenType(Enum):
# Literals
IDENTIFIER = auto()
STRING = auto()
FSTRING_START = auto()
FSTRING_END = auto()
FSTRING_TEXT = auto()
NUMBER = auto()
TRUE = auto()
FALSE = auto()