From 6482e06bcaa42cae3e18d52b0fbe9ebf0fb50ef0 Mon Sep 17 00:00:00 2001 From: LordBaryhobal Date: Thu, 14 May 2026 02:19:50 +0200 Subject: [PATCH] feat(parser): add base Midas parser (incomplete) --- core/ast/midas.py | 86 +++++++++++++++++++++++++++++++++++++++++++++ core/ast/printer.py | 50 ++++++++++++++++++++++++++ parser/midas.py | 67 +++++++++++++++++++++++++++++++++++ 3 files changed, 203 insertions(+) create mode 100644 core/ast/midas.py create mode 100644 parser/midas.py diff --git a/core/ast/midas.py b/core/ast/midas.py new file mode 100644 index 0000000..1c85f66 --- /dev/null +++ b/core/ast/midas.py @@ -0,0 +1,86 @@ +from __future__ import annotations + +from abc import ABC, abstractmethod +from dataclasses import dataclass +from typing import Generic, Optional, TypeVar + +from lexer.token import Token + +T = TypeVar("T") + + +# Statements + + +@dataclass(frozen=True) +class Stmt(ABC): + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): + @abstractmethod + def visit_type_stmt(self, stmt: TypeStmt) -> T: ... + + @abstractmethod + def visit_property_stmt(self, stmt: PropertyStmt) -> T: ... + + +@dataclass(frozen=True) +class TypeStmt(Stmt): + name: Token + bases: list[TypeExpr] + body: Optional[TypeBodyExpr] + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_type_stmt(self) + + +@dataclass(frozen=True) +class PropertyStmt(Stmt): + name: Token + type: TypeExpr + + def accept(self, visitor: Stmt.Visitor[T]) -> T: + return visitor.visit_property_stmt(self) + + +# Expressions + + +@dataclass(frozen=True) +class Expr(ABC): + @abstractmethod + def accept(self, visitor: Visitor[T]) -> T: ... + + class Visitor(ABC, Generic[T]): + @abstractmethod + def visit_type_expr(self, expr: TypeExpr) -> T: ... + + @abstractmethod + def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ... + + @abstractmethod + def visit_type_body_expr(self, expr: TypeBodyExpr) -> T: ... + + +@dataclass(frozen=True) +class TypeExpr(Expr): + name: Token + constraints: list[ConstraintExpr] + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_type_expr(self) + + +@dataclass(frozen=True) +class ConstraintExpr(Expr): + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_constraint_expr(self) + + +@dataclass(frozen=True) +class TypeBodyExpr(Expr): + properties: list[PropertyStmt] + + def accept(self, visitor: Expr.Visitor[T]) -> T: + return visitor.visit_type_body_expr(self) diff --git a/core/ast/printer.py b/core/ast/printer.py index 4394834..8c873eb 100644 --- a/core/ast/printer.py +++ b/core/ast/printer.py @@ -6,6 +6,7 @@ import io from typing import Generator, Generic, Optional, Protocol, TypeVar import core.ast.annotations as a +import core.ast.midas as m class _Level(Enum): @@ -131,3 +132,52 @@ class AnnotationPrinter(a.Expr.Visitor[str]): else: parts.append(expr.type.accept(self)) return ": ".join(parts) + + +class MidasAstPrinter(AstPrinter, m.Expr.Visitor[None], m.Stmt.Visitor[None]): + def visit_type_stmt(self, stmt: m.TypeStmt): + self._write_line("TypeStmt") + with self._child_level(): + self._write_line(f'name: "{stmt.name.lexeme}"') + self._write_line("bases") + with self._child_level(): + for i, base in enumerate(stmt.bases): + self._idx = i + if i == len(stmt.bases) - 1: + self._mark_last() + base.accept(self) + self._write_optional_child("body", stmt.body, last=True) + + def visit_property_stmt(self, stmt: m.PropertyStmt): + self._write_line("PropertyStmt") + with self._child_level(): + self._write_line(f'name: "{stmt.name.lexeme}"') + self._write_line("type") + with self._child_level(): + stmt.type.accept(self) + + def visit_type_expr(self, expr: m.TypeExpr): + self._write_line("TypeExpr") + with self._child_level(): + self._write_line(f'name: "{expr.name.lexeme}"') + self._write_line("constraints") + with self._child_level(): + for i, constraint in enumerate(expr.constraints): + self._idx = i + if i == len(expr.constraints) - 1: + self._mark_last() + constraint.accept(self) + + def visit_constraint_expr(self, expr: m.ConstraintExpr): + self._write_line("ConstraintExpr") + + def visit_type_body_expr(self, expr: m.TypeBodyExpr): + self._write_line("TypeBodyExpr") + with self._child_level(): + self._write_line("properties") + with self._child_level(): + for i, property in enumerate(expr.properties): + self._idx = i + if i == len(expr.properties) - 1: + self._mark_last() + property.accept(self) diff --git a/parser/midas.py b/parser/midas.py new file mode 100644 index 0000000..fd02c04 --- /dev/null +++ b/parser/midas.py @@ -0,0 +1,67 @@ +from typing import Optional + +from core.ast.midas import ConstraintExpr, Stmt, TypeBodyExpr, TypeExpr, TypeStmt +from lexer.token import Token, TokenType +from parser.base import Parser +from parser.errors import ParsingError + + +class MidasParser(Parser): + SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT} + + def parse(self) -> list[Stmt]: + statements: list[Stmt] = [] + while not self.is_at_end(): + stmt: Optional[Stmt] = self.declaration() + if stmt is None: + print("Early stop") + break + statements.append(stmt) + return statements + + def synchronize(self): + self.advance() + while not self.is_at_end(): + if self.previous().type == TokenType.NEWLINE: + return + if self.peek().type in self.SYNC_BOUNDARY: + return + self.advance() + + def declaration(self) -> Optional[Stmt]: + try: + if self.match(TokenType.TYPE): + return self.type_declaration() + # if self.match(TokenType.OP): + # return self.op_declaration() + # if self.match(TokenType.CONSTRAINT): + # return self.constraint_declaration() + except ParsingError: + self.synchronize() + return None + + def type_declaration(self) -> TypeStmt: + name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") + self.consume(TokenType.LESS, "Expected '<' after type name") + bases: list[TypeExpr] = [] + while not self.check(TokenType.GREATER) and not self.is_at_end(): + bases.append(self.type_expr()) + if not self.check(TokenType.GREATER): + self.consume(TokenType.COMMA, "Expected ',' between type bases") + self.consume(TokenType.GREATER, "Expected '>' after base type") + + body: Optional[TypeBodyExpr] = None + return TypeStmt(name=name, bases=bases, body=body) + + def type_expr(self) -> TypeExpr: + name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name") + constraints: list[ConstraintExpr] = [] + + while not self.is_at_end() and self.match(TokenType.PLUS): + constraints.append(self.constraint_expr()) + + return TypeExpr(name=name, constraints=constraints) + + def constraint_expr(self) -> ConstraintExpr: + # TODO + return ConstraintExpr()