feat: add format spec lexer

This commit is contained in:
2026-02-07 17:56:59 +01:00
parent 631a62d878
commit 9af843e802
6 changed files with 200 additions and 1 deletions

View File

@@ -0,0 +1,9 @@
let a = 42
print(f"int: {a:d}; hex: {a:h}; HEX: {a:H}; oct: {a:o}; bin: {a:b}")
let b = 1234567890
print(f"{b:,}")
print(f"{b:_}")
let pts = 19
let total = 22
print(f"Correct answers: {points/total:.2%}")

View File

View File

@@ -0,0 +1,129 @@
from typing import Optional, Any, Callable
from src.core.position import Position
from src.core.format_spec.token import Token, TokenType
class FormatSpecLexer:
def __init__(self, source: str, path: Optional[str] = None):
self.path: str = path or "<main>"
self.source: str = source
self.tokens: list[Token] = []
self.start: int = 0
self.idx: int = 0
self.length: int = len(self.source)
self.line: int = 1
self.column: int = 1
self.start_pos: Position = self.get_position()
def error(self, msg: str):
raise SyntaxError(f"[ERROR] Format spect error at {self.start_pos}: {msg}")
def process(self) -> list[Token]:
self.scan_tokens()
self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
return self.tokens
def is_at_end(self) -> bool:
return self.idx >= self.length
def get_position(self) -> Position:
return Position(self.path, self.line, self.column)
def peek(self) -> str:
if self.idx < self.length:
return self.source[self.idx]
return ""
def peek_next(self) -> str:
if self.idx + 1 < self.length:
return self.source[self.idx + 1]
return ""
def advance(self) -> str:
char: str = self.peek()
self.idx += 1
self.column += 1
if char == "\n":
self.newline()
return char
def newline(self):
self.line += 1
self.column = 1
def update_start(self):
self.start_pos = self.get_position()
self.start = self.idx
def add_token(self, token_type: TokenType, value: Optional[Any] = None):
lexeme: str = self.source[self.start:self.idx]
self.tokens.append(
Token(
position=self.start_pos,
type=token_type,
lexeme=lexeme,
value=value
)
)
def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
if condition is None:
condition = lambda: True
while condition() and not self.is_at_end():
self.update_start()
self.scan_token()
def scan_token(self):
char: str = self.advance()
match char:
case "+":
self.add_token(TokenType.PLUS)
case "-":
self.add_token(TokenType.MINUS)
case " ":
self.add_token(TokenType.SPACE)
case ",":
self.add_token(TokenType.COMMA)
case "_":
self.add_token(TokenType.UNDERSCORE)
case "s":
self.add_token(TokenType.T_STR)
case "b":
self.add_token(TokenType.T_BIN)
case "d":
self.add_token(TokenType.T_DEC)
case "o":
self.add_token(TokenType.T_OCT)
case "h" | "H":
self.add_token(TokenType.T_HEX)
case "e":
self.add_token(TokenType.T_SCI)
case "f":
self.add_token(TokenType.T_FIX)
case "%":
self.add_token(TokenType.T_PCT)
case "." if self.peek().isdigit():
self.scan_number(True)
case _:
if char.isdigit():
self.scan_number()
else:
self.error("Unexpected character")
return None
def scan_number(self, decimal_only: bool = False):
while self.peek().isdigit():
self.advance()
if not decimal_only:
if self.peek() == "." and self.peek_next().isdigit():
self.advance()
while self.peek().isdigit():
self.advance()
value_str: str = self.source[self.start:self.idx]
if decimal_only:
value_str = f"0{value_str}"
value: float = float(value_str)
self.add_token(TokenType.NUMBER, value)

View File

@@ -0,0 +1,50 @@
from dataclasses import dataclass
from enum import Enum, auto
from typing import Any
from src.core.position import Position
class TokenType(Enum):
# Sign
PLUS = auto()
MINUS = auto()
SPACE = auto()
# Grouping
COMMA = auto()
UNDERSCORE = auto()
# Type
## Str
T_STR = auto()
## Int
T_BIN = auto()
T_DEC = auto()
T_OCT = auto()
T_HEX = auto()
## Float
T_SCI = auto()
T_FIX = auto()
T_PCT = auto()
# Misc
NUMBER = auto()
EOF = auto()
@dataclass(frozen=True)
class Token:
type: TokenType
lexeme: str
value: Any
position: Position
def __repr__(self) -> str:
res: str = f"[{self.type.name}"
if self.value is not None:
res += f" ({self.value!r})"
res += "]"
return res

View File

@@ -1,5 +1,6 @@
from typing import Optional, Any, Callable from typing import Optional, Any, Callable
from src.core.format_spec.lexer import FormatSpecLexer
from src.token.keyword import KEYWORDS from src.token.keyword import KEYWORDS
from src.core.position import Position from src.core.position import Position
from src.token.token import Token, TokenType from src.token.token import Token, TokenType
@@ -172,10 +173,19 @@ class Lexer:
self.update_start() self.update_start()
def scan_fstring_embed(self): def scan_fstring_embed(self):
self.scan_tokens(lambda: self.peek() != "}") self.scan_tokens(lambda: self.peek() != "}" and self.peek() != ":")
if self.is_at_end(): if self.is_at_end():
self.error("Unterminated f-string embed") self.error("Unterminated f-string embed")
self.update_start() self.update_start()
if self.match(":"):
self.update_start()
while self.peek() != "}":
self.advance()
format_spec_str: str = self.source[self.start:self.idx]
format_spec: list = FormatSpecLexer(format_spec_str, self.path).process()
self.add_token(TokenType.FORMAT_SPEC, format_spec)
self.update_start()
self.advance() self.advance()
self.add_token(TokenType.RIGHT_BRACE) self.add_token(TokenType.RIGHT_BRACE)
self.update_start() self.update_start()

View File

@@ -70,6 +70,7 @@ class TokenType(Enum):
WHITESPACE = auto() WHITESPACE = auto()
EOF = auto() EOF = auto()
NEWLINE = auto() NEWLINE = auto()
FORMAT_SPEC = auto()
@dataclass(frozen=True) @dataclass(frozen=True)