Files
pebble/src/parser/parser.py

231 lines
7.7 KiB
Python

from typing import Optional
from src.ast.expr import Expr, BinaryExpr, UnaryExpr, LiteralExpr, GroupingExpr, VariableExpr, AssignExpr
from src.ast.stmt import Stmt, PrintStmt, ExpressionStmt, LetStmt, BlockStmt, IfStmt
from src.parser.error import ParsingError
from src.pebble import Pebble
from src.token import Token, TokenType
class Parser:
IGNORE: set[TokenType] = {
TokenType.WHITESPACE, TokenType.COMMENT
}
STATEMENT_BOUNDARY: set[TokenType] = {
TokenType.FOR, TokenType.WHILE, TokenType.IF, TokenType.PRINT
}
def __init__(self):
self.tokens: list[Token] = []
self.current: int = 0
self.length: int = 0
@staticmethod
def error(token: Token, msg: str):
Pebble.token_error(token, msg)
return ParsingError()
def parse(self, tokens: list[Token]) -> list[Stmt]:
self.tokens = list(filter(lambda t: t.type not in self.IGNORE, tokens))
self.current = 0
self.length = len(self.tokens)
statements: list[Stmt] = []
self.skip_newlines()
while not self.is_at_end():
self.skip_newlines()
statements.append(self.declaration())
return statements
def skip_newlines(self):
while self.check(TokenType.NEWLINE):
self.advance()
def is_at_end(self) -> bool:
return self.peek().type == TokenType.EOF
def peek(self) -> Token:
return self.tokens[self.current]
def previous(self) -> Token:
return self.tokens[self.current - 1]
def check(self, token_type: TokenType) -> bool:
if self.is_at_end():
return False
return self.peek().type == token_type
def advance(self):
token: Token = self.peek()
self.current += 1
return token
def match(self, *types: TokenType) -> bool:
for token_type in types:
if self.check(token_type):
self.advance()
return True
return False
def consume(self, token_type: TokenType, error_msg: str) -> Token:
if self.check(token_type):
return self.advance()
raise self.error(self.peek(), error_msg)
def expect_eol(self, error_msg: str):
if self.is_at_end():
return
if not self.match(TokenType.NEWLINE) and not self.match(TokenType.EOF):
raise self.error(self.peek(), error_msg)
# Parsing
def synchronize(self):
self.advance()
while not self.is_at_end():
if self.previous().type == TokenType.NEWLINE:
return
if self.peek().type in self.STATEMENT_BOUNDARY:
return
self.advance()
def declaration(self) -> Optional[Stmt]:
try:
if self.match(TokenType.LET):
return self.var_declaration()
return self.statement()
except ParsingError:
self.synchronize()
return None
def var_declaration(self) -> Stmt:
name: Token = self.consume(TokenType.IDENTIFIER, "Expected variable name.")
initializer: Optional[Expr] = None
if self.match(TokenType.EQUAL):
initializer = self.expression()
self.expect_eol("Expected end of line after variable initialization")
return LetStmt(name, initializer)
def statement(self) -> Stmt:
if self.match(TokenType.IF):
return self.if_stmt()
if self.match(TokenType.PRINT):
return self.print_stmt()
if self.match(TokenType.LEFT_BRACE):
return self.block_stmt()
return self.expression_stmt()
def if_stmt(self) -> Stmt:
condition: Expr = self.expression()
then_branch: Stmt = self.statement()
else_branch: Optional[Stmt] = None
if self.match(TokenType.ELSE):
else_branch = self.statement()
return IfStmt(condition, then_branch, else_branch)
def print_stmt(self) -> Stmt:
self.consume(TokenType.LEFT_PAREN, "Missing parentheses")
value: Expr = self.expression()
self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis")
self.expect_eol("Expected end of line after statement")
return PrintStmt(value)
def block_stmt(self) -> Stmt:
statements: list[Stmt] = []
self.skip_newlines()
while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end():
self.skip_newlines()
statements.append(self.declaration())
self.consume(TokenType.RIGHT_BRACE, "Expected '}' after block.")
return BlockStmt(statements)
def expression_stmt(self) -> Stmt:
value: Expr = self.expression()
self.expect_eol("Expected end of line after expression")
return ExpressionStmt(value)
def expression(self) -> Expr:
return self.assignment()
def assignment(self) -> Expr:
expr: Expr = self.equality()
if self.match(TokenType.EQUAL, TokenType.PLUS_EQUAL, TokenType.MINUS_EQUAL, TokenType.STAR_EQUAL, TokenType.SLASH_EQUAL):
operator: Token = self.previous()
value: Expr = self.assignment()
if isinstance(expr, VariableExpr):
name: Token = expr.name
if operator.type == TokenType.EQUAL:
return AssignExpr(name, value)
else:
return AssignExpr(
name,
BinaryExpr(
VariableExpr(name),
operator,
value
)
)
self.error(operator, "Invalid assignment target.")
return expr
def equality(self) -> Expr:
expr: Expr = self.comparison()
while self.match(TokenType.BANG_EQUAL, TokenType.EQUAL_EQUAL):
operator: Token = self.previous()
right: Expr = self.comparison()
expr = BinaryExpr(expr, operator, right)
return expr
def comparison(self) -> Expr:
expr: Expr = self.term()
while self.match(TokenType.LESS, TokenType.LESS_EQUAL, TokenType.GREATER, TokenType.GREATER_EQUAL):
operator: Token = self.previous()
right: Expr = self.term()
expr = BinaryExpr(expr, operator, right)
return expr
def term(self) -> Expr:
expr: Expr = self.factor()
while self.match(TokenType.PLUS, TokenType.MINUS):
operator: Token = self.previous()
right: Expr = self.factor()
expr = BinaryExpr(expr, operator, right)
return expr
def factor(self) -> Expr:
expr: Expr = self.unary()
while self.match(TokenType.STAR, TokenType.SLASH):
operator: Token = self.previous()
right: Expr = self.unary()
expr = BinaryExpr(expr, operator, right)
return expr
def unary(self) -> Expr:
if self.match(TokenType.BANG, TokenType.MINUS):
operator: Token = self.previous()
right: Expr = self.unary()
return UnaryExpr(operator, right)
return self.primary()
def primary(self) -> Expr:
if self.match(TokenType.FALSE):
return LiteralExpr(False)
if self.match(TokenType.TRUE):
return LiteralExpr(True)
if self.match(TokenType.NULL):
return LiteralExpr(None)
if self.match(TokenType.NUMBER, TokenType.STRING):
return LiteralExpr(self.previous().value)
if self.match(TokenType.IDENTIFIER):
return VariableExpr(self.previous())
if self.match(TokenType.LEFT_PAREN):
expr: Expr = self.expression()
self.consume(TokenType.RIGHT_PAREN, "Unclosed parenthesis")
return GroupingExpr(expr)
raise self.error(self.peek(), "Expected expression")