From 278451888728fbcf43f1cfa214902d553e53209e Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Sat, 7 Feb 2026 19:14:01 +0100 Subject: [PATCH] feat: add format spec parser --- examples/basic/23_format_spec.peb | 2 +- src/ast/expr.py | 4 +- src/core/format_spec/lexer.py | 17 +--- src/core/format_spec/parser.py | 119 +++++++++++++++++++++++ src/core/format_spec/spec.py | 34 +++++++ src/core/format_spec/string_formatter.py | 30 ++++++ src/core/format_spec/token.py | 1 + src/interpreter/interpreter.py | 18 ++-- src/parser/parser.py | 9 +- 9 files changed, 205 insertions(+), 29 deletions(-) create mode 100644 src/core/format_spec/parser.py create mode 100644 src/core/format_spec/spec.py create mode 100644 src/core/format_spec/string_formatter.py diff --git a/examples/basic/23_format_spec.peb b/examples/basic/23_format_spec.peb index 92f46d5..8780593 100644 --- a/examples/basic/23_format_spec.peb +++ b/examples/basic/23_format_spec.peb @@ -6,4 +6,4 @@ print(f"{b:_}") let pts = 19 let total = 22 -print(f"Correct answers: {points/total:.2%}") \ No newline at end of file +print(f"Correct answers: {pts/total:.2%}") \ No newline at end of file diff --git a/src/ast/expr.py b/src/ast/expr.py index c27dcb8..e01dddd 100644 --- a/src/ast/expr.py +++ b/src/ast/expr.py @@ -2,8 +2,9 @@ from __future__ import annotations from abc import ABC, abstractmethod from dataclasses import dataclass -from typing import Any, TypeVar, Generic +from typing import Any, TypeVar, Generic, Optional +from src.core.format_spec.spec import FormatSpec from src.token.token import Token @@ -151,6 +152,7 @@ class FStringExpr(Expr): class FStringEmbedExpr(Expr): start: Token expression: Expr + spec: Optional[FormatSpec] end: Token def accept(self, visitor: Expr.Visitor[T]) -> T: diff --git a/src/core/format_spec/lexer.py b/src/core/format_spec/lexer.py index e7b697a..578aa51 100644 --- a/src/core/format_spec/lexer.py +++ b/src/core/format_spec/lexer.py @@ -103,8 +103,8 @@ class FormatSpecLexer: self.add_token(TokenType.T_FIX) case "%": self.add_token(TokenType.T_PCT) - case "." if self.peek().isdigit(): - self.scan_number(True) + case ".": + self.add_token(TokenType.DOT) case _: if char.isdigit(): self.scan_number() @@ -112,18 +112,9 @@ class FormatSpecLexer: self.error("Unexpected character") return None - def scan_number(self, decimal_only: bool = False): + def scan_number(self): while self.peek().isdigit(): self.advance() - if not decimal_only: - if self.peek() == "." and self.peek_next().isdigit(): - self.advance() - while self.peek().isdigit(): - self.advance() - - value_str: str = self.source[self.start:self.idx] - if decimal_only: - value_str = f"0{value_str}" - value: float = float(value_str) + value: float = float(self.source[self.start:self.idx]) self.add_token(TokenType.NUMBER, value) diff --git a/src/core/format_spec/parser.py b/src/core/format_spec/parser.py new file mode 100644 index 0000000..7ad7eba --- /dev/null +++ b/src/core/format_spec/parser.py @@ -0,0 +1,119 @@ +from typing import Optional + +from src.core.format_spec.spec import FormatSpec, FormatSpecOptions, FormatSpecNumber, FormatSpecIntegral, \ + FormatSpecDecimal +from src.core.format_spec.token import Token, TokenType +from src.parser.error import ParsingError +from src.pebble import Pebble + + +class FormatSpecParser: + TYPES: set[TokenType] = { + TokenType.T_STR, + TokenType.T_BIN, + TokenType.T_DEC, + TokenType.T_OCT, + TokenType.T_HEX, + TokenType.T_SCI, + TokenType.T_FIX, + TokenType.T_PCT, + } + + def __init__(self, tokens: list[Token]): + self.tokens: list[Token] = tokens + self.current: int = 0 + self.length: int = len(self.tokens) + + @staticmethod + def error(token: Token, msg: str): + Pebble.token_error(token, msg) + return ParsingError() + + def parse(self) -> FormatSpec: + return self.spec() + + def is_at_end(self) -> bool: + return self.peek().type == TokenType.EOF + + def peek(self) -> Token: + return self.tokens[self.current] + + def previous(self) -> Token: + return self.tokens[self.current - 1] + + def check(self, token_type: TokenType) -> bool: + if self.is_at_end(): + return False + return self.peek().type == token_type + + def advance(self): + token: Token = self.peek() + self.current += 1 + return token + + def match(self, *types: TokenType) -> bool: + for token_type in types: + if self.check(token_type): + self.advance() + return True + return False + + def consume(self, token_type: TokenType, error_msg: str) -> Token: + if self.check(token_type): + return self.advance() + raise self.error(self.peek(), error_msg) + + # Parsing + def spec(self) -> FormatSpec: + options: FormatSpecOptions = self.options() + number: FormatSpecNumber = self.number() + type: Optional[Token] = self.type() + return FormatSpec( + options=options, + number=number, + type=type + ) + + def options(self) -> FormatSpecOptions: + sign: Optional[Token] = None + if self.match(TokenType.PLUS, TokenType.MINUS, TokenType.SPACE): + sign = self.previous() + + return FormatSpecOptions( + sign=sign + ) + + def number(self) -> FormatSpecNumber: + integral: FormatSpecIntegral = self.integral() + decimal: FormatSpecDecimal = self.decimal() + return FormatSpecNumber(integral=integral, decimal=decimal) + + def integral(self) -> FormatSpecIntegral: + width: Optional[int] = None + grouping: Optional[Token] = None + if self.match(TokenType.NUMBER): + width = self.previous().value + if self.match(TokenType.COMMA, TokenType.UNDERSCORE): + grouping = self.previous().value + return FormatSpecIntegral( + width=width, + grouping=grouping + ) + + def decimal(self) -> FormatSpecDecimal: + precision: Optional[int] = None + grouping: Optional[Token] = None + if self.match(TokenType.DOT): + if self.match(TokenType.NUMBER): + precision = self.previous().value + if self.match(TokenType.COMMA, TokenType.UNDERSCORE): + grouping = self.previous().value + return FormatSpecDecimal( + precision=precision, + grouping=grouping + ) + + def type(self) -> Optional[Token]: + if self.match(*self.TYPES): + return self.previous() + return None diff --git a/src/core/format_spec/spec.py b/src/core/format_spec/spec.py new file mode 100644 index 0000000..da8a971 --- /dev/null +++ b/src/core/format_spec/spec.py @@ -0,0 +1,34 @@ +from dataclasses import dataclass +from typing import Optional + +from src.core.format_spec.token import Token + + +@dataclass(frozen=True) +class FormatSpecOptions: + sign: Optional[Token] + + +@dataclass(frozen=True) +class FormatSpecIntegral: + width: Optional[int] + grouping: Optional[Token] + + +@dataclass(frozen=True) +class FormatSpecDecimal: + precision: Optional[int] + grouping: Optional[Token] + + +@dataclass(frozen=True) +class FormatSpecNumber: + integral: FormatSpecIntegral + decimal: FormatSpecDecimal + + +@dataclass(frozen=True) +class FormatSpec: + options: FormatSpecOptions + number: FormatSpecNumber + type: Optional[Token] diff --git a/src/core/format_spec/string_formatter.py b/src/core/format_spec/string_formatter.py new file mode 100644 index 0000000..0935250 --- /dev/null +++ b/src/core/format_spec/string_formatter.py @@ -0,0 +1,30 @@ +from typing import Any + +from src.core.format_spec.spec import FormatSpec +from src.core.format_spec.token import TokenType, Token +from src.interpreter.error import PebbleRuntimeError + + +class StringFormatter: + @staticmethod + def stringify(obj: Any): + if obj is None: + return "null" + if obj is True: + return "true" + if obj is False: + return "false" + if isinstance(obj, (int, float)): + if obj.is_integer(): + obj = int(obj) + return str(obj) + return obj + + @staticmethod + def check_type(token: Token, obj: Any, expected_type: type | tuple[type, ...]): + if not isinstance(obj, expected_type): + raise PebbleRuntimeError(token, f"Invalid value type. Expected {expected_type}, got {type(obj)}") + + def format(self, obj: Any, spec: FormatSpec) -> str: + # TODO + return str(obj) diff --git a/src/core/format_spec/token.py b/src/core/format_spec/token.py index a9bdcba..d336d99 100644 --- a/src/core/format_spec/token.py +++ b/src/core/format_spec/token.py @@ -32,6 +32,7 @@ class TokenType(Enum): # Misc NUMBER = auto() + DOT = auto() EOF = auto() diff --git a/src/interpreter/interpreter.py b/src/interpreter/interpreter.py index f483365..3f700cf 100644 --- a/src/interpreter/interpreter.py +++ b/src/interpreter/interpreter.py @@ -6,6 +6,7 @@ from src.ast.stmt import Stmt, ExpressionStmt, LetStmt, BlockStmt, IfStmt, While ReturnStmt, BreakStmt, ContinueStmt, ClassStmt from src.consts import CONSTRUCTOR_NAME from src.core.callable import PebbleCallable +from src.core.format_spec.string_formatter import StringFormatter from src.core.function import PebbleFunction from src.core.instance import PebbleInstance from src.core.klass import PebbleClass @@ -186,7 +187,10 @@ class Interpreter(Expr.Visitor[Any], Stmt.Visitor[None]): ]) def visit_fstring_embed_expr(self, expr: FStringEmbedExpr) -> Any: - return self.stringify(self.evaluate(expr.expression)) + value: Any = self.evaluate(expr.expression) + if expr.spec is None: + return self.stringify(value) + return StringFormatter().format(value, expr.spec) def visit_variable_expr(self, expr: VariableExpr) -> Any: return self.look_up_variable(expr.name, expr) @@ -343,14 +347,4 @@ class Interpreter(Expr.Visitor[Any], Stmt.Visitor[None]): @staticmethod def stringify(obj: Any) -> str: - if obj is None: - return "null" - if obj is True: - return "true" - if obj is False: - return "false" - if isinstance(obj, (int, float)): - if obj.is_integer(): - obj = int(obj) - return str(obj) - return obj + return StringFormatter.stringify(obj) diff --git a/src/parser/parser.py b/src/parser/parser.py index ffe8983..a4a1f2b 100644 --- a/src/parser/parser.py +++ b/src/parser/parser.py @@ -5,6 +5,8 @@ from src.ast.expr import Expr, BinaryExpr, UnaryExpr, LiteralExpr, GroupingExpr, from src.ast.stmt import Stmt, ExpressionStmt, LetStmt, BlockStmt, IfStmt, WhileStmt, ForStmt, FunctionStmt, \ ReturnStmt, BreakStmt, ContinueStmt, ClassStmt from src.consts import MAX_FUNCTION_ARGS +from src.core.format_spec.parser import FormatSpecParser +from src.core.format_spec.spec import FormatSpec from src.parser.error import ParsingError from src.pebble import Pebble from src.token.token import Token, TokenType @@ -391,14 +393,17 @@ class Parser: def fstring(self) -> Expr: start: Token = self.previous() - parts: list[Expr] = [] + parts: list[LiteralExpr | FStringEmbedExpr] = [] while not self.check(TokenType.FSTRING_END) and not self.is_at_end(): if self.match(TokenType.LEFT_BRACE): brace: Token = self.previous() expr: Expr = self.expression() + spec: Optional[FormatSpec] = None + if self.match(TokenType.FORMAT_SPEC): + spec = FormatSpecParser(self.previous().value).parse() self.consume(TokenType.RIGHT_BRACE, "Expected '}' after f-string embed") - parts.append(FStringEmbedExpr(brace, expr, self.previous())) + parts.append(FStringEmbedExpr(brace, expr, spec, self.previous())) else: self.consume(TokenType.FSTRING_TEXT, "Unexpected token") parts.append(LiteralExpr(self.previous().value))