diff --git a/parser/base.py b/parser/base.py new file mode 100644 index 0000000..2195f72 --- /dev/null +++ b/parser/base.py @@ -0,0 +1,163 @@ +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Generic, TypeVar + +from lexer.token import Token, TokenType +from parser.errors import ParsingError + + +@dataclass(frozen=True) +class TokenError: + """A parsing error linked to a particular token""" + + token: Token + message: str + + def get_report(self) -> str: + """Get a detailed error message + + Returns: + str: the complete error message + """ + where: str = f"'{self.token.lexeme}'" + if self.token.type == TokenType.EOF: + where = "end" + return f"({self.token.position}) Error at {where}: {self.message}" + + +T = TypeVar("T") + + +class Parser(ABC, Generic[T]): + """An abstract parser which provides methods to easily extend it into a concrete one + + This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom, + more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble) + + [1]: https://craftinginterpreters.com/ + """ + + IGNORE: set[TokenType] = { + TokenType.WHITESPACE, + TokenType.COMMENT, + TokenType.NEWLINE, + } + + def __init__(self, tokens: list[Token]) -> None: + """Create a new parser to parse the given tokens + + Args: + tokens (list[Token]): the tokens to parse + """ + self.tokens: list[Token] = list( + filter(lambda t: t.type not in self.IGNORE, tokens) + ) + self.current: int = 0 + self.length: int = len(self.tokens) + self.errors: list[TokenError] + + def error(self, token: Token, message: str): + """Record an error + + Args: + token (Token): the token at which the error was detected + message (str): a message explaining the error + + Returns: + ParsingError: the parsing error to raise + """ + self.errors.append(TokenError(token=token, message=message)) + return ParsingError() + + @abstractmethod + def parse(self) -> T: + """Parse the tokens + + Returns: + T: the parsed element(s) + """ + pass + + def is_at_end(self) -> bool: + """Whether the parser is at the end of the token list + + Returns: + bool: True if the current index is at the end of the token list + """ + return self.peek().type == TokenType.EOF + + def peek(self) -> Token: + """Get the current token without advancing + + Returns: + Token: the current token + """ + return self.tokens[self.current] + + def previous(self) -> Token: + """Get the previous token + + This function is unsafe and will raise an IndexError if called when + the parser is at the begin of the token list + + Returns: + Token: the previous token + """ + return self.tokens[self.current - 1] + + def check(self, token_type: TokenType) -> bool: + """Check whether the current token is of the given type + + This function always returns False if the parser is at the EOF token + + Args: + token_type (TokenType): the type of token to check + + Returns: + bool: True if the current token is of the given type and not EOF + """ + if self.is_at_end(): + return False + return self.peek().type == token_type + + def advance(self) -> Token: + """Consume and return the current token, if not at the EOF + + Returns: + Token: the current token, before advancing + """ + if not self.is_at_end(): + self.current += 1 + return self.previous() + + def match(self, *types: TokenType) -> bool: + """Consume the next token if it matches one of the given types + + Returns: + bool: whether a token was matched and consumed + """ + for token_type in types: + if self.check(token_type): + self.advance() + return True + return False + + def consume(self, token_type: TokenType, error_msg: str) -> Token: + """Consume the current token if it matches the given type or raise an error + + If the current token doesn't match the given type, an error is raised + with the provided message + + Args: + token_type (TokenType): the expected token type + error_msg (str): the error message if the token doesn't match + + Raises: + SyntaxError: if the current token doesn't match the given type + + Returns: + Token: the current token which matched the given type + """ + if self.check(token_type): + return self.advance() + raise self.error(self.peek(), error_msg) diff --git a/parser/errors.py b/parser/errors.py new file mode 100644 index 0000000..e8e65fb --- /dev/null +++ b/parser/errors.py @@ -0,0 +1,2 @@ +class ParsingError(RuntimeError): + pass