diff --git a/examples/07_math.peb b/examples/07_math.peb new file mode 100644 index 0000000..559650d --- /dev/null +++ b/examples/07_math.peb @@ -0,0 +1 @@ +3 - (4 / 9 + 1) == 12 \ No newline at end of file diff --git a/main.py b/main.py index 16a14aa..5542da7 100644 --- a/main.py +++ b/main.py @@ -1,6 +1,7 @@ from src.ast.expr import Expr, BinaryExpr, UnaryExpr, LiteralExpr, GroupingExpr from src.ast.printer import AstPrinter from src.lexer import Lexer +from src.parser.parser import Parser from src.token import Token, TokenType @@ -12,7 +13,7 @@ def main(): 123 "This is another string" """ - path: str = "examples/06_comments.peb" + path: str = "examples/07_math.peb" with open(path, "r") as f: source = f.read() lexer: Lexer = Lexer() @@ -30,6 +31,11 @@ def main(): ) print(printer.print(ast)) + parser: Parser = Parser() + ast = parser.process(tokens) + + print(printer.print(ast)) + if __name__ == '__main__': main() diff --git a/src/parser/__init__.py b/src/parser/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/parser/parser.py b/src/parser/parser.py new file mode 100644 index 0000000..fdcc9fb --- /dev/null +++ b/src/parser/parser.py @@ -0,0 +1,115 @@ +from src.ast.expr import Expr, BinaryExpr, UnaryExpr, LiteralExpr, GroupingExpr +from src.token import Token, TokenType + + +class Parser: + IGNORE: set[TokenType] = { + TokenType.WHITESPACE, TokenType.COMMENT + } + + def __init__(self): + self.tokens: list[Token] = [] + self.current: int = 0 + self.length: int = 0 + + def error(self, token: Token, msg: str): + lexeme: str = "end" if token.type == TokenType.EOF else f"'{token.lexeme}'" + raise SyntaxError(f"[ERROR] Invalid syntax at {lexeme} ({token.position}): {msg}") + + def process(self, tokens: list[Token]): + self.tokens = list(filter(lambda t: t.type not in self.IGNORE, tokens)) + self.current = 0 + self.length = len(self.tokens) + + return self.expression() + + def is_at_end(self) -> bool: + return self.current >= self.length + + def peek(self) -> Token: + return self.tokens[self.current] + + def previous(self) -> Token: + return self.tokens[self.current - 1] + + def check(self, token_type: TokenType) -> bool: + if self.is_at_end(): + return False + return self.peek().type == token_type + + def advance(self): + token: Token = self.peek() + self.current += 1 + return token + + def match(self, *types: TokenType) -> bool: + for token_type in types: + if self.check(token_type): + self.advance() + return True + return False + + def consume(self, token_type: TokenType, error_msg: str): + if not self.match(token_type): + self.error(self.peek(), error_msg) + + # Parsing + def expression(self) -> Expr: + return self.equality() + + def equality(self) -> Expr: + expr: Expr = self.comparison() + while self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL): + operator: Token = self.previous() + right: Expr = self.comparison() + expr = BinaryExpr(expr, operator, right) + return expr + + def comparison(self) -> Expr: + expr: Expr = self.term() + while self.match(TokenType.LESS, TokenType.LESS_EQUAL, TokenType.GREATER, TokenType.GREATER_EQUAL): + operator: Token = self.previous() + right: Expr = self.term() + expr = BinaryExpr(expr, operator, right) + return expr + + def term(self) -> Expr: + expr: Expr = self.factor() + while self.match(TokenType.PLUS, TokenType.MINUS): + operator: Token = self.previous() + right: Expr = self.factor() + expr = BinaryExpr(expr, operator, right) + return expr + + def factor(self) -> Expr: + expr: Expr = self.unary() + while self.match(TokenType.STAR, TokenType.SLASH): + operator: Token = self.previous() + right: Expr = self.unary() + expr = BinaryExpr(expr, operator, right) + return expr + + def unary(self) -> Expr: + if self.match(TokenType.BANG, TokenType.MINUS): + operator: Token = self.previous() + right: Expr = self.unary() + return UnaryExpr(operator, right) + return self.primary() + + def primary(self) -> Expr: + if self.match(TokenType.FALSE): + return LiteralExpr(False) + if self.match(TokenType.TRUE): + return LiteralExpr(True) + if self.match(TokenType.NULL): + return LiteralExpr(None) + + if self.match(TokenType.NUMBER, TokenType.STRING): + return LiteralExpr(self.previous().value) + + if self.match(TokenType.LEFT_PAREN): + expr: Expr = self.expression() + self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis") + return GroupingExpr(expr) + + self.error(self.peek(), "Malformed expression")