feat: add format spec lexer

2026-02-07 17:56:59 +01:00
parent 631a62d878
commit 9af843e802
6 changed files with 200 additions and 1 deletions
--- a/examples/basic/23_format_spec.peb
+++ b/examples/basic/23_format_spec.peb
@@ -0,0 +1,9 @@
 let a = 42
 print(f"int: {a:d}; hex: {a:h}; HEX: {a:H}; oct: {a:o}; bin: {a:b}")
 let b = 1234567890
 print(f"{b:,}")
 print(f"{b:_}")
 let pts = 19
 let total = 22
 print(f"Correct answers: {points/total:.2%}")
--- a/src/core/format_spec/init.py
+++ b/src/core/format_spec/init.py
--- a/src/core/format_spec/lexer.py
+++ b/src/core/format_spec/lexer.py
@@ -0,0 +1,129 @@
 from typing import Optional, Any, Callable
 from src.core.position import Position
 from src.core.format_spec.token import Token, TokenType
 class FormatSpecLexer:
    def __init__(self, source: str, path: Optional[str] = None):
        self.path: str = path or "<main>"
        self.source: str = source
        self.tokens: list[Token] = []
        self.start: int = 0
        self.idx: int = 0
        self.length: int = len(self.source)
        self.line: int = 1
        self.column: int = 1
        self.start_pos: Position = self.get_position()
    def error(self, msg: str):
        raise SyntaxError(f"[ERROR] Format spect error at {self.start_pos}: {msg}")
    def process(self) -> list[Token]:
        self.scan_tokens()
        self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
        return self.tokens
    def is_at_end(self) -> bool:
        return self.idx >= self.length
    def get_position(self) -> Position:
        return Position(self.path, self.line, self.column)
    def peek(self) -> str:
        if self.idx < self.length:
            return self.source[self.idx]
        return ""
    def peek_next(self) -> str:
        if self.idx + 1 < self.length:
            return self.source[self.idx + 1]
        return ""
    def advance(self) -> str:
        char: str = self.peek()
        self.idx += 1
        self.column += 1
        if char == "\n":
            self.newline()
        return char
    def newline(self):
        self.line += 1
        self.column = 1
    def update_start(self):
        self.start_pos = self.get_position()
        self.start = self.idx
    def add_token(self, token_type: TokenType, value: Optional[Any] = None):
        lexeme: str = self.source[self.start:self.idx]
        self.tokens.append(
            Token(
                position=self.start_pos,
                type=token_type,
                lexeme=lexeme,
                value=value
            )
        )
    def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
        if condition is None:
            condition = lambda: True
        while condition() and not self.is_at_end():
            self.update_start()
            self.scan_token()
    def scan_token(self):
        char: str = self.advance()
        match char:
            case "+":
                self.add_token(TokenType.PLUS)
            case "-":
                self.add_token(TokenType.MINUS)
            case " ":
                self.add_token(TokenType.SPACE)
            case ",":
                self.add_token(TokenType.COMMA)
            case "_":
                self.add_token(TokenType.UNDERSCORE)
            case "s":
                self.add_token(TokenType.T_STR)
            case "b":
                self.add_token(TokenType.T_BIN)
            case "d":
                self.add_token(TokenType.T_DEC)
            case "o":
                self.add_token(TokenType.T_OCT)
            case "h" | "H":
                self.add_token(TokenType.T_HEX)
            case "e":
                self.add_token(TokenType.T_SCI)
            case "f":
                self.add_token(TokenType.T_FIX)
            case "%":
                self.add_token(TokenType.T_PCT)
            case "." if self.peek().isdigit():
                self.scan_number(True)
            case _:
                if char.isdigit():
                    self.scan_number()
                else:
                    self.error("Unexpected character")
        return None
    def scan_number(self, decimal_only: bool = False):
        while self.peek().isdigit():
            self.advance()
        if not decimal_only:
            if self.peek() == "." and self.peek_next().isdigit():
                self.advance()
                while self.peek().isdigit():
                    self.advance()
        value_str: str = self.source[self.start:self.idx]
        if decimal_only:
            value_str = f"0{value_str}"
        value: float = float(value_str)
        self.add_token(TokenType.NUMBER, value)
--- a/src/core/format_spec/token.py
+++ b/src/core/format_spec/token.py
@@ -0,0 +1,50 @@
 from dataclasses import dataclass
 from enum import Enum, auto
 from typing import Any
 from src.core.position import Position
 class TokenType(Enum):
    # Sign
    PLUS = auto()
    MINUS = auto()
    SPACE = auto()
    # Grouping
    COMMA = auto()
    UNDERSCORE = auto()
    # Type
    ## Str
    T_STR = auto()
    ## Int
    T_BIN = auto()
    T_DEC = auto()
    T_OCT = auto()
    T_HEX = auto()
    ## Float
    T_SCI = auto()
    T_FIX = auto()
    T_PCT = auto()
    # Misc
    NUMBER = auto()
    EOF = auto()
@dataclass(frozen=True)
 class Token:
    type: TokenType
    lexeme: str
    value: Any
    position: Position
    def __repr__(self) -> str:
        res: str = f"[{self.type.name}"
        if self.value is not None:
            res += f" ({self.value!r})"
        res += "]"
        return res
--- a/src/token/lexer.py
+++ b/src/token/lexer.py
@@ -1,5 +1,6 @@
 from typing import Optional, Any, Callable
 from src.core.format_spec.lexer import FormatSpecLexer
 from src.token.keyword import KEYWORDS
 from src.core.position import Position
 from src.token.token import Token, TokenType
@@ -172,10 +173,19 @@ class Lexer:
        self.update_start()
    def scan_fstring_embed(self):
-        self.scan_tokens(lambda: self.peek() != "}")
+        self.scan_tokens(lambda: self.peek() != "}" and self.peek() != ":")
        if self.is_at_end():
            self.error("Unterminated f-string embed")
        self.update_start()
        if self.match(":"):
            self.update_start()
            while self.peek() != "}":
                self.advance()
            format_spec_str: str = self.source[self.start:self.idx]
            format_spec: list = FormatSpecLexer(format_spec_str, self.path).process()
            self.add_token(TokenType.FORMAT_SPEC, format_spec)
            self.update_start()
        self.advance()
        self.add_token(TokenType.RIGHT_BRACE)
        self.update_start()
--- a/src/token/token.py
+++ b/src/token/token.py
@@ -70,6 +70,7 @@ class TokenType(Enum):
    WHITESPACE = auto()
    EOF = auto()
    NEWLINE = auto()
    FORMAT_SPEC = auto()
@dataclass(frozen=True)