Files
midas/parser/base.py

184 lines
5.3 KiB
Python

from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Generic, TypeVar
from lexer.token import Token, TokenType
from parser.errors import ParsingError
@dataclass(frozen=True)
class TokenError:
"""A parsing error linked to a particular token"""
token: Token
message: str
def get_report(self) -> str:
"""Get a detailed error message
Returns:
str: the complete error message
"""
where: str = f"'{self.token.lexeme}'"
if self.token.type == TokenType.EOF:
where = "end"
return f"({self.token.position}) Error at {where}: {self.message}"
T = TypeVar("T")
class Parser(ABC, Generic[T]):
"""An abstract parser which provides methods to easily extend it into a concrete one
This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
[1]: https://craftinginterpreters.com/
"""
IGNORE: set[TokenType] = {
TokenType.WHITESPACE,
TokenType.COMMENT,
TokenType.NEWLINE,
}
def __init__(self, tokens: list[Token]) -> None:
"""Create a new parser to parse the given tokens
Args:
tokens (list[Token]): the tokens to parse
"""
self.tokens: list[Token] = list(
filter(lambda t: t.type not in self.IGNORE, tokens)
)
self.current: int = 0
self.length: int = len(self.tokens)
self.errors: list[TokenError] = []
def error(self, token: Token, message: str):
"""Record an error
Args:
token (Token): the token at which the error was detected
message (str): a message explaining the error
Returns:
ParsingError: the parsing error to raise
"""
self.errors.append(TokenError(token=token, message=message))
return ParsingError()
@abstractmethod
def parse(self) -> T:
"""Parse the tokens
Returns:
T: the parsed element(s)
"""
pass
def is_at_end(self) -> bool:
"""Whether the parser is at the end of the token list
Returns:
bool: True if the current index is at the end of the token list
"""
return self.peek().type == TokenType.EOF
def peek(self) -> Token:
"""Get the current token without advancing
Returns:
Token: the current token
"""
return self.tokens[self.current]
def previous(self) -> Token:
"""Get the previous token
This function is unsafe and will raise an IndexError if called when
the parser is at the begin of the token list
Returns:
Token: the previous token
"""
return self.tokens[self.current - 1]
def check(self, token_type: TokenType) -> bool:
"""Check whether the current token is of the given type
This function always returns False if the parser is at the EOF token
Args:
token_type (TokenType): the type of token to check
Returns:
bool: True if the current token is of the given type and not EOF
"""
if self.is_at_end():
return False
return self.peek().type == token_type
def check_next(self, token_type: TokenType) -> bool:
"""Check whether the next token is of the given type
This function always returns False if the parser is at the EOF token
Args:
token_type (TokenType): the type of token to check
Returns:
bool: True if the current token is of the given type and not EOF
"""
if self.is_at_end():
return False
if self.current + 1 >= self.length:
return False
token: Token = self.tokens[self.current + 1]
if token.type == TokenType.EOF:
return False
return token.type == token_type
def advance(self) -> Token:
"""Consume and return the current token, if not at the EOF
Returns:
Token: the current token, before advancing
"""
if not self.is_at_end():
self.current += 1
return self.previous()
def match(self, *types: TokenType) -> bool:
"""Consume the next token if it matches one of the given types
Returns:
bool: whether a token was matched and consumed
"""
for token_type in types:
if self.check(token_type):
self.advance()
return True
return False
def consume(self, token_type: TokenType, error_msg: str) -> Token:
"""Consume the current token if it matches the given type or raise an error
If the current token doesn't match the given type, an error is raised
with the provided message
Args:
token_type (TokenType): the expected token type
error_msg (str): the error message if the token doesn't match
Raises:
SyntaxError: if the current token doesn't match the given type
Returns:
Token: the current token which matched the given type
"""
if self.check(token_type):
return self.advance()
raise self.error(self.peek(), error_msg)