Merge pull request 'Syntax prototype and basic parser' (#1) from feat/syntax-prototype into main

Reviewed-on: #1
2026-05-19 08:34:07 +00:00
parent 2f839419f8 697f4d5003
commit 80bfcd0d1a
30 changed files with 2772 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
 .vscode
 __pycache__
 .env
 venv
 .venv
 *.pyc
--- a/core/ast/annotations.py
+++ b/core/ast/annotations.py
@@ -0,0 +1,107 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any, Generic, Optional, TypeVar
 from lexer.token import Token
 T = TypeVar("T")
@dataclass(frozen=True)
 class Stmt(ABC):
    @abstractmethod
    def accept(self, visitor: Visitor[T]) -> T: ...
    class Visitor(ABC, Generic[T]):
        @abstractmethod
        def visit_annotation_stmt(self, stmt: AnnotationStmt) -> T: ...
@dataclass(frozen=True)
 class AnnotationStmt(Stmt):
    name: Token
    schema: Optional[SchemaExpr]
    def accept(self, visitor: Stmt.Visitor[T]) -> T:
        return visitor.visit_annotation_stmt(self)
@dataclass(frozen=True)
 class Expr(ABC):
    @abstractmethod
    def accept(self, visitor: Visitor[T]) -> T: ...
    class Visitor(ABC, Generic[T]):
        @abstractmethod
        def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ...
        @abstractmethod
        def visit_literal_expr(self, expr: LiteralExpr) -> T: ...
        @abstractmethod
        def visit_type_expr(self, expr: TypeExpr) -> T: ...
        @abstractmethod
        def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ...
        @abstractmethod
        def visit_schema_expr(self, expr: SchemaExpr) -> T: ...
        @abstractmethod
        def visit_schema_element_expr(self, expr: SchemaElementExpr) -> T: ...
@dataclass(frozen=True)
 class WildcardExpr(Expr):
    token: Token
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_wildcard_expr(self)
@dataclass(frozen=True)
 class LiteralExpr(Expr):
    value: Any
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_literal_expr(self)
@dataclass(frozen=True)
 class TypeExpr(Expr):
    name: Token
    constraints: list[ConstraintExpr]
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_type_expr(self)
@dataclass(frozen=True)
 class ConstraintExpr(Expr):
    left: Expr
    op: Token
    right: Expr
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_constraint_expr(self)
@dataclass(frozen=True)
 class SchemaExpr(Expr):
    left: Token
    elements: list[Expr]
    right: Token
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_schema_expr(self)
@dataclass(frozen=True)
 class SchemaElementExpr(Expr):
    name: Optional[Token]
    type: Optional[Expr]
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_schema_element_expr(self)
--- a/core/ast/midas.py
+++ b/core/ast/midas.py
@@ -0,0 +1,138 @@
 from __future__ import annotations
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Any, Generic, Optional, TypeVar
 from lexer.token import Token
 T = TypeVar("T")
 # Statements
@dataclass(frozen=True)
 class Stmt(ABC):
    @abstractmethod
    def accept(self, visitor: Visitor[T]) -> T: ...
    class Visitor(ABC, Generic[T]):
        @abstractmethod
        def visit_type_stmt(self, stmt: TypeStmt) -> T: ...
        @abstractmethod
        def visit_property_stmt(self, stmt: PropertyStmt) -> T: ...
        @abstractmethod
        def visit_op_stmt(self, stmt: OpStmt) -> T: ...
        @abstractmethod
        def visit_constraint_stmt(self, stmt: ConstraintStmt) -> T: ...
@dataclass(frozen=True)
 class TypeStmt(Stmt):
    name: Token
    bases: list[TypeExpr]
    body: Optional[TypeBodyExpr]
    def accept(self, visitor: Stmt.Visitor[T]) -> T:
        return visitor.visit_type_stmt(self)
@dataclass(frozen=True)
 class PropertyStmt(Stmt):
    name: Token
    type: TypeExpr
    def accept(self, visitor: Stmt.Visitor[T]) -> T:
        return visitor.visit_property_stmt(self)
@dataclass(frozen=True)
 class OpStmt(Stmt):
    left: TypeExpr
    op: Token
    right: TypeExpr
    result: TypeExpr
    def accept(self, visitor: Stmt.Visitor[T]) -> T:
        return visitor.visit_op_stmt(self)
@dataclass(frozen=True)
 class ConstraintStmt(Stmt):
    name: Token
    constraint: ConstraintExpr
    def accept(self, visitor: Stmt.Visitor[T]) -> T:
        return visitor.visit_constraint_stmt(self)
 # Expressions
@dataclass(frozen=True)
 class Expr(ABC):
    @abstractmethod
    def accept(self, visitor: Visitor[T]) -> T: ...
    class Visitor(ABC, Generic[T]):
        @abstractmethod
        def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ...
        @abstractmethod
        def visit_literal_expr(self, expr: LiteralExpr) -> T: ...
        @abstractmethod
        def visit_type_expr(self, expr: TypeExpr) -> T: ...
        @abstractmethod
        def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ...
        @abstractmethod
        def visit_type_body_expr(self, expr: TypeBodyExpr) -> T: ...
@dataclass(frozen=True)
 class WildcardExpr(Expr):
    token: Token
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_wildcard_expr(self)
@dataclass(frozen=True)
 class LiteralExpr(Expr):
    value: Any
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_literal_expr(self)
@dataclass(frozen=True)
 class TypeExpr(Expr):
    name: Token
    constraints: list[ConstraintExpr]
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_type_expr(self)
@dataclass(frozen=True)
 class ConstraintExpr(Expr):
    left: Expr
    op: Token
    right: Expr
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_constraint_expr(self)
@dataclass(frozen=True)
 class TypeBodyExpr(Expr):
    properties: list[PropertyStmt]
    def accept(self, visitor: Expr.Visitor[T]) -> T:
        return visitor.visit_type_body_expr(self)
--- a/core/ast/printer.py
+++ b/core/ast/printer.py
@@ -0,0 +1,360 @@
 from __future__ import annotations
 from contextlib import contextmanager
 from enum import Enum, auto
 import io
 from typing import Generator, Generic, Optional, Protocol, TypeVar
 import core.ast.annotations as a
 import core.ast.midas as m
 class _Level(Enum):
    EMPTY = auto()
    ACTIVE = auto()
    LAST = auto()
 class Expr(Protocol):
    def accept(self, printer: AstPrinter) -> None: ...
 T = TypeVar("T", bound=Expr)
 class AstPrinter(Generic[T]):
    LAST_CHILD = "└── "
    CHILD = "├── "
    VERTICAL = "│   "
    EMPTY = "    "
    def __init__(self):
        self._levels: list[_Level] = []
        self._idx: Optional[int] = None
        self._buf: io.StringIO = io.StringIO()
    def print(self, expr: T):
        self._buf = io.StringIO()
        expr.accept(self)
        return self._buf.getvalue()
    @contextmanager
    def _child_level(self, last: bool = False) -> Generator[None, None, None]:
        self._levels.append(_Level.LAST if last else _Level.ACTIVE)
        try:
            yield
        finally:
            self._levels.pop()
    def _mark_last(self):
        if self._levels:
            self._levels[-1] = _Level.LAST
    def _write_line(self, text: str, *, last: bool = False):
        if last:
            self._mark_last()
        indent: str = self._build_indent()
        if self._idx is not None:
            text = f"[{self._idx}] {text}"
            self._idx = None
        self._buf.write(indent + text + "\n")
    def _build_indent(self) -> str:
        parts: list[str] = []
        for level in self._levels[:-1]:
            parts.append(self.EMPTY if level == _Level.EMPTY else self.VERTICAL)
        if self._levels:
            if self._levels[-1] == _Level.LAST:
                parts.append(self.LAST_CHILD)
                self._levels[-1] = _Level.EMPTY
            else:
                parts.append(self.CHILD)
        return "".join(parts)
    def _write_optional_child(
        self, label: str, child: Optional[T], *, last: bool = False
    ):
        if last:
            self._mark_last()
        if child is None:
            self._write_line(f"{label}: None")
        else:
            self._write_line(label)
            with self._child_level(last=True):
                child.accept(self)
 class AnnotationAstPrinter(AstPrinter, a.Expr.Visitor[None], a.Stmt.Visitor[None]):
    def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> None:
        self._write_line("AnnotationStmt")
        with self._child_level():
            self._write_line(f'name: "{stmt.name.lexeme}"')
            self._write_optional_child("schema", stmt.schema, last=True)
    def visit_type_expr(self, expr: a.TypeExpr):
        self._write_line("TypeExpr")
        with self._child_level():
            self._write_line(f'name: "{expr.name.lexeme}"')
            self._write_line("constraints", last=True)
            with self._child_level():
                for i, constraint in enumerate(expr.constraints):
                    self._idx = i
                    if i == len(expr.constraints) - 1:
                        self._mark_last()
                    constraint.accept(self)
    def visit_constraint_expr(self, expr: a.ConstraintExpr) -> None:
        self._write_line("ConstraintExpr")
        with self._child_level():
            self._write_line("left")
            with self._child_level():
                self._mark_last()
                expr.left.accept(self)
            self._write_line(f"operator: {expr.op.lexeme}")
            self._write_line("right", last=True)
            with self._child_level():
                self._mark_last()
                expr.right.accept(self)
    def visit_schema_expr(self, expr: a.SchemaExpr):
        self._write_line("SchemaExpr")
        with self._child_level():
            for i, elmt in enumerate(expr.elements):
                self._idx = i
                if i == len(expr.elements) - 1:
                    self._mark_last()
                elmt.accept(self)
    def visit_schema_element_expr(self, expr: a.SchemaElementExpr):
        self._write_line("SchemaElementExpr")
        with self._child_level():
            name_text: str = "None" if expr.name is None else f'"{expr.name.lexeme}"'
            self._write_line(f"name: {name_text}")
            self._write_optional_child("type", expr.type, last=True)
    def visit_wildcard_expr(self, expr: a.WildcardExpr) -> None:
        self._write_line("WildcardExpr")
    def visit_literal_expr(self, expr: a.LiteralExpr) -> None:
        self._write_line("LiteralExpr")
        with self._child_level():
            self._write_line(f"value: {expr.value}", last=True)
 class AnnotationPrinter(a.Expr.Visitor[str], a.Stmt.Visitor[str]):
    def print(self, expr: a.Expr | a.Stmt):
        return expr.accept(self)
    def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> str:
        schema: str = ""
        if stmt.schema is not None:
            schema = stmt.schema.accept(self)
        return f"{stmt.name.lexeme}{schema}"
    def visit_type_expr(self, expr: a.TypeExpr) -> str:
        parts: list[str] = [expr.name.lexeme]
        for constraint in expr.constraints:
            parts.append("(" + constraint.accept(self) + ")")
        return " + ".join(parts)
    def visit_constraint_expr(self, expr: a.ConstraintExpr) -> str:
        parts: list[str] = [
            expr.left.accept(self),
            expr.op.lexeme,
            expr.right.accept(self),
        ]
        return " ".join(parts)
    def visit_schema_expr(self, expr: a.SchemaExpr) -> str:
        res: str = expr.left.lexeme
        res += ", ".join(elmt.accept(self) for elmt in expr.elements)
        res += expr.right.lexeme
        return res
    def visit_schema_element_expr(self, expr: a.SchemaElementExpr) -> str:
        parts: list[str] = []
        if expr.name is not None:
            parts.append(expr.name.lexeme)
        if expr.type is None:
            parts.append("_")
        else:
            parts.append(expr.type.accept(self))
        return ": ".join(parts)
    def visit_wildcard_expr(self, expr: a.WildcardExpr) -> str:
        return "_"
    def visit_literal_expr(self, expr: a.LiteralExpr) -> str:
        return str(expr.value)
 class MidasAstPrinter(AstPrinter, m.Expr.Visitor[None], m.Stmt.Visitor[None]):
    def visit_type_stmt(self, stmt: m.TypeStmt):
        self._write_line("TypeStmt")
        with self._child_level():
            self._write_line(f'name: "{stmt.name.lexeme}"')
            self._write_line("bases")
            with self._child_level():
                for i, base in enumerate(stmt.bases):
                    self._idx = i
                    if i == len(stmt.bases) - 1:
                        self._mark_last()
                    base.accept(self)
            self._write_optional_child("body", stmt.body, last=True)
    def visit_property_stmt(self, stmt: m.PropertyStmt):
        self._write_line("PropertyStmt")
        with self._child_level():
            self._write_line(f'name: "{stmt.name.lexeme}"')
            self._write_line("type", last=True)
            with self._child_level():
                self._mark_last()
                stmt.type.accept(self)
    def visit_op_stmt(self, stmt: m.OpStmt) -> None:
        self._write_line("OpStmt")
        with self._child_level():
            self._write_line("left")
            with self._child_level():
                self._mark_last()
                stmt.left.accept(self)
            self._write_line(f'op: "{stmt.op.lexeme}"')
            self._write_line("right")
            with self._child_level():
                self._mark_last()
                stmt.right.accept(self)
            self._write_line("result", last=True)
            with self._child_level():
                self._mark_last()
                stmt.result.accept(self)
    def visit_constraint_stmt(self, stmt: m.ConstraintStmt):
        self._write_line("ConstraintStmt")
        with self._child_level():
            self._write_line(f'name: "{stmt.name.lexeme}"')
            self._write_line("constraint", last=True)
            with self._child_level():
                self._mark_last()
                stmt.constraint.accept(self)
    def visit_type_expr(self, expr: m.TypeExpr):
        self._write_line("TypeExpr")
        with self._child_level():
            self._write_line(f'name: "{expr.name.lexeme}"')
            self._write_line("constraints", last=True)
            with self._child_level():
                for i, constraint in enumerate(expr.constraints):
                    self._idx = i
                    if i == len(expr.constraints) - 1:
                        self._mark_last()
                    constraint.accept(self)
    def visit_constraint_expr(self, expr: m.ConstraintExpr):
        self._write_line("ConstraintExpr")
        with self._child_level():
            self._write_line("left")
            with self._child_level():
                self._mark_last()
                expr.left.accept(self)
            self._write_line(f"operator: {expr.op.lexeme}")
            self._write_line("right", last=True)
            with self._child_level():
                self._mark_last()
                expr.right.accept(self)
    def visit_type_body_expr(self, expr: m.TypeBodyExpr):
        self._write_line("TypeBodyExpr")
        with self._child_level():
            self._write_line("properties", last=True)
            with self._child_level():
                for i, property in enumerate(expr.properties):
                    self._idx = i
                    if i == len(expr.properties) - 1:
                        self._mark_last()
                    property.accept(self)
    def visit_wildcard_expr(self, expr: m.WildcardExpr) -> None:
        self._write_line("WildcardExpr")
    def visit_literal_expr(self, expr: m.LiteralExpr) -> None:
        self._write_line("LiteralExpr")
        with self._child_level():
            self._write_line(f"value: {expr.value}", last=True)
 class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]):
    def __init__(self, indent: int = 4):
        self.indent: int = indent
        self.level: int = 0
    def indented(self, text: str) -> str:
        return " " * (self.level * self.indent) + text
    def print(self, expr: m.Expr | m.Stmt):
        self.level = 0
        return expr.accept(self)
    def visit_type_stmt(self, stmt: m.TypeStmt):
        bases: list[str] = [
            b.accept(self)
            for b in stmt.bases
        ]
        res: str = self.indented(f"type {stmt.name.lexeme}<{', '.join(bases)}>")
        if stmt.body is not None:
            res += " {\n"
            self.level += 1
            res += stmt.body.accept(self)
            self.level -= 1
            res += "\n" + self.indented("}")
        return res
    def visit_property_stmt(self, stmt: m.PropertyStmt):
        return f"{stmt.name.lexeme}: {stmt.type.accept(self)}"
    def visit_op_stmt(self, stmt: m.OpStmt):
        left: str = stmt.left.accept(self)
        op: str = stmt.op.lexeme
        right: str = stmt.right.accept(self)
        result: str = stmt.result.accept(self)
        return self.indented(f"op <{left}> {op} <{right}> = <{result}>")
    def visit_constraint_stmt(self, stmt: m.ConstraintStmt):
        name: str = stmt.name.lexeme
        constraint: str = stmt.constraint.accept(self)
        return self.indented(f"constraint {name} = {constraint}")
    def visit_type_expr(self, expr: m.TypeExpr):
        parts: list[str] = [expr.name.lexeme]
        for constraint in expr.constraints:
            parts.append("(" + constraint.accept(self) + ")")
        return " + ".join(parts)
    def visit_constraint_expr(self, expr: m.ConstraintExpr):
        parts: list[str] = [
            expr.left.accept(self),
            expr.op.lexeme,
            expr.right.accept(self),
        ]
        return " ".join(parts)
    def visit_type_body_expr(self, expr: m.TypeBodyExpr):
        properties: list[str] = [
            self.indented(prop.accept(self))
            for prop in expr.properties
        ]
        return "\n".join(properties)
    def visit_wildcard_expr(self, expr: m.WildcardExpr):
        return "_"
    def visit_literal_expr(self, expr: m.LiteralExpr):
        return str(expr.value)
--- a/examples/00_syntax_prototype/01_simple_types.py
+++ b/examples/00_syntax_prototype/01_simple_types.py
@@ -0,0 +1,16 @@
 # type: ignore
 # ruff: disable[F821]
 from __future__ import annotations
 # A simple data-frame with different column of various simple types
 # Columns can be named and/or typed
 df: Frame[
    verified: bool,
    birth_year: int,
    height: float + ( _ > 0 ) + ( _ < 250 ),
    name: str,
    date: datetime,
    float,  # unnamed
    unknown: _,  # untyped
    _  # unnamed and untyped
 ]
--- a/examples/00_syntax_prototype/02_custom_types.midas
+++ b/examples/00_syntax_prototype/02_custom_types.midas
@@ -0,0 +1,24 @@
 // Simple custom type derived from floats
 type Latitude<float>
 type Longitude<float>
 // Complex custom type, containing two values accessible through properties
 type GeoLocation<Latitude, Longitude> {
    lat: Latitude
    lon: Longitude
 }
 type LatitudeDiff<float>
 type LongitudeDiff<float>
 // Simple operation defined on our custom types
 op <Latitude> - <Latitude> = <LatitudeDiff>
 op <Longitude> - <Longitude> = <LongitudeDiff>
 // Simple custom type with a constraint
 type Age<int + (0 <= _) + (_ < 150)>
 // Predefined custom constraints that can be referenced in other definitions
 constraint Positive = _ >= 0
 constraint StrictlyPositive = _ > 0
 //constraint Even = _ % 2 == 0
--- a/examples/00_syntax_prototype/02_custom_types.py
+++ b/examples/00_syntax_prototype/02_custom_types.py
@@ -0,0 +1,34 @@
 # type: ignore
 # ruff: disable[F821]
 from __future__ import annotations
 # Prototype of custom type import to use valid Python syntax
 import midas
 midas.using("02_custom_types.midas")
 # A data-frame using a custom type
 df: Frame[
    location: GeoLocation
 ]
 # Properties of a type can be used on a column of that type
 lat: Column[GeoLocation] = df["location"].lat
 lon: Column[GeoLocation] = df["location"].lon
 # Unregistered operations between types are not permitted
 lat + lon  # Invalid operation
 # Registered operations are permitted
 lat1: Latitude = lat[0]
 lat2: Latitude = lat[1]
 lat_diff: LatitudeDiff = lat2 - lat1  # Valid operation
 # In addition to the type, a column can have one or more constraints, either defined inline or in a separate file
 df2: Frame[
    age: int + (_ >= 0),
    height: float + (_ >= 0),
 ]
 df2_bis: Frame[
    age: int + Positive,
    height: float + Positive,
 ]
--- a/lexer/init.py
+++ b/lexer/init.py
--- a/lexer/annotations.py
+++ b/lexer/annotations.py
@@ -0,0 +1,102 @@
 from lexer.base import Lexer
 from lexer.keyword import ANNOTATION_KEYWORDS
 from lexer.token import TokenType
 class AnnotationLexer(Lexer):
    def scan_token(self) -> None:
        char: str = self.advance()
        match char:
            case "(":
                self.add_token(TokenType.LEFT_PAREN)
            case ")":
                self.add_token(TokenType.RIGHT_PAREN)
            case "[":
                self.add_token(TokenType.LEFT_BRACKET)
            case "]":
                self.add_token(TokenType.RIGHT_BRACKET)
            case "<":
                self.add_token(
                    TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
                )
            case ">":
                self.add_token(
                    TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
                )
            case "=":
                self.add_token(
                    TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
                )
            case "!":
                if self.match("="):
                    self.add_token(TokenType.BANG_EQUAL)
                else:
                    self.error("Unexpected single bang. Did you mean '!=' ?")
            case ":":
                self.add_token(TokenType.COLON)
            case ",":
                self.add_token(TokenType.COMMA)
            case "_":
                self.add_token(TokenType.UNDERSCORE)
            case "+":
                self.add_token(TokenType.PLUS)
            case "#":
                self.scan_comment()
            case "\n":
                self.add_token(TokenType.NEWLINE)
            case " " | "\r" | "\t":
                # Consume all whitespace characters until EOL or EOF
                while (
                    self.peek().isspace()
                    and self.peek() != "\n"
                    and not self.is_at_end()
                ):
                    self.advance()
                self.add_token(TokenType.WHITESPACE)
            case _:
                if char.isdigit():
                    self.scan_number()
                elif char.isalpha():
                    self.scan_identifier()
                else:
                    self.error("Unexpected character")
        return None
    def scan_number(self):
        """Scan the rest of number and add it as a token
        This method handles both simple integers and floats. Scientific notation
        and base prefixes (0x, 0b, 0o) are not supported
        """
        while self.peek().isdigit():
            self.advance()
        if self.peek() == "." and self.peek_next().isdigit():
            self.advance()
            while self.peek().isdigit():
                self.advance()
        value: float = float(self.source[self.start : self.idx])
        self.add_token(TokenType.NUMBER, value)
    def scan_identifier(self):
        """Scan the rest of an identifier and add it as a token
        An identifier starts with a letter, followed by any number of
        alphanumerical characters or underscores
        """
        while self.peek().isalnum() or self.peek() == "_":
            self.advance()
        lexeme: str = self.source[self.start : self.idx]
        token_type: TokenType = ANNOTATION_KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
        self.add_token(token_type)
    def scan_comment(self):
        """Scan the rest of a comment and add it as a token
        A comment starts with a `#` character and ends at the EOL/EOF
        """
        while self.peek() != "\n" and not self.is_at_end():
            self.advance()
        self.add_token(TokenType.COMMENT)
--- a/lexer/base.py
+++ b/lexer/base.py
@@ -0,0 +1,166 @@
 from abc import ABC, abstractmethod
 from typing import Any, Callable, Optional
 from lexer.position import Position
 from lexer.token import Token, TokenType
 class Lexer(ABC):
    """An abstract lexer which provides methods to easily extend it into a concrete one
    This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
    more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
    [1]: https://craftinginterpreters.com/
    """
    def __init__(self, source: str, file: Optional[str] = None) -> None:
        """Create a new lexer to scan for tokens in the given source
        Args:
            source (str): the source to scan
            file (Optional[str], optional): the path of the given source. Can be a file path or any string identifier. Defaults to None.
        """
        self.source: str = source
        self.file: Optional[str] = file
        self.tokens: list[Token] = []
        self.start: int = 0
        self.idx: int = 0
        self.length: int = len(self.source)
        self.line: int = 1
        self.column: int = 1
        self.start_pos: Position = self.get_position()
    def error(self, msg: str):
        """Raise a syntax error
        Args:
            msg (str): the error message
        Raises:
            SyntaxError
        """
        raise SyntaxError(f"[ERROR] Error at {self.start_pos}: {msg}")
    def process(self) -> list[Token]:
        """Scan tokens out of the source text
        Returns:
            list[Token]: all the tokens that could be scanned
        Raises:
            SyntaxError: if a syntax error is found
        """
        self.scan_tokens()
        self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
        return self.tokens
    def is_at_end(self) -> bool:
        """Whether the lexer is at the end of the source
        Returns:
            bool: True if the current index is at the end of the source
        """
        return self.idx >= self.length
    def get_position(self) -> Position:
        """Get the current position
        Returns:
            Position: the current position
        """
        return Position(file=self.file, line=self.line, column=self.column)
    def peek(self) -> str:
        """Get the current character without advancing, if any
        Returns:
            str: the current character, or an empty string if at EOF
        """
        if self.idx < self.length:
            return self.source[self.idx]
        return ""
    def peek_next(self) -> str:
        """Get the next character without advancing, if any
        Returns:
            str: the next character, or an empty string if at EOF
        """
        if self.idx + 1 < self.length:
            return self.source[self.idx + 1]
        return ""
    def advance(self) -> str:
        """Get the new character and advance
        Returns:
            str: the current character, before advancing
        """
        char: str = self.peek()
        self.idx += 1
        self.column += 1
        if char == "\n":
            self.newline()
        return char
    def newline(self):
        """Update the current position after encountering a newline character"""
        self.line += 1
        self.column = 1
    def match(self, expected: str) -> bool:
        """Consume the next character if it matches the given value
        Args:
            expected (str): the expected character
        Returns:
            bool: whether a character was matched and consumed
        """
        if self.peek() == expected:
            self.advance()
            return True
        return False
    def update_start(self):
        """Update the starting position of the current lexeme
        The cursor marking the start of the lexeme currently being scanned is
        moved to the current position
        """
        self.start_pos = self.get_position()
        self.start = self.idx
    def add_token(self, token_type: TokenType, value: Optional[Any] = None):
        """Add the current lexeme to the list of scanned tokens
        Args:
            token_type (TokenType): the type of token to add
            value (Optional[Any], optional): the value of the token (useful for numbers or constants). Defaults to None.
        """
        lexeme: str = self.source[self.start : self.idx]
        self.tokens.append(
            Token(position=self.start_pos, type=token_type, lexeme=lexeme, value=value)
        )
    def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
        """Scan tokens until EOF is reached or the given condition becomes False
        Args:
            condition (Optional[Callable[[], bool]], optional): the condition to continue scanning tokens.
                If None, defaults to always being True, effectively scanning tokens until EOF is reached. Defaults to None.
        """
        if condition is None:
            condition = lambda: True  # noqa: E731
        while condition() and not self.is_at_end():
            self.update_start()
            self.scan_token()
    @abstractmethod
    def scan_token(self) -> None:
        """Scan a token
        This function should (at least) consume the current character and produce the appropriate token(s), using `add_token`
        """
        pass
--- a/lexer/keyword.py
+++ b/lexer/keyword.py
@@ -0,0 +1,16 @@
 from lexer.token import TokenType
 ANNOTATION_KEYWORDS: dict[str, TokenType] = {
    "True": TokenType.TRUE,
    "False": TokenType.FALSE,
    "None": TokenType.NONE,
 }
 MIDAS_KEYWORDS: dict[str, TokenType] = {
    "type": TokenType.TYPE,
    "op": TokenType.OP,
    "constraint": TokenType.CONSTRAINT,
    "true": TokenType.TRUE,
    "false": TokenType.FALSE,
    "none": TokenType.NONE,
 }
--- a/lexer/midas.py
+++ b/lexer/midas.py
@@ -0,0 +1,131 @@
 from lexer.base import Lexer
 from lexer.keyword import MIDAS_KEYWORDS
 from lexer.token import TokenType
 class MidasLexer(Lexer):
    def scan_token(self) -> None:
        char: str = self.advance()
        match char:
            case "(":
                self.add_token(TokenType.LEFT_PAREN)
            case ")":
                self.add_token(TokenType.RIGHT_PAREN)
            case "[":
                self.add_token(TokenType.LEFT_BRACKET)
            case "]":
                self.add_token(TokenType.RIGHT_BRACKET)
            case "{":
                self.add_token(TokenType.LEFT_BRACE)
            case "}":
                self.add_token(TokenType.RIGHT_BRACE)
            case "<":
                self.add_token(
                    TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
                )
            case ">":
                self.add_token(
                    TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
                )
            case "=":
                self.add_token(
                    TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
                )
            case "!":
                if self.match("="):
                    self.add_token(TokenType.BANG_EQUAL)
                else:
                    self.error("Unexpected single bang. Did you mean '!=' ?")
            case ":":
                self.add_token(TokenType.COLON)
            case ",":
                self.add_token(TokenType.COMMA)
            case "_":
                self.add_token(TokenType.UNDERSCORE)
            case "+":
                self.add_token(TokenType.PLUS)
            case "-":
                self.add_token(TokenType.MINUS)
            case "*":
                self.add_token(TokenType.STAR)
            case "/":
                if self.match("/"):
                    self.scan_comment()
                elif self.match("*"):
                    self.scan_comment_multiline()
                else:
                    self.add_token(TokenType.SLASH)
            case "\n":
                self.add_token(TokenType.NEWLINE)
            case " " | "\r" | "\t":
                # Consume all whitespace characters until EOL or EOF
                while (
                    self.peek().isspace()
                    and self.peek() != "\n"
                    and not self.is_at_end()
                ):
                    self.advance()
                self.add_token(TokenType.WHITESPACE)
            case _:
                if char.isdigit():
                    self.scan_number()
                elif char.isalpha():
                    self.scan_identifier()
                else:
                    self.error("Unexpected character")
        return None
    def scan_number(self):
        """Scan the rest of number and add it as a token
        This method handles both simple integers and floats. Scientific notation
        and base prefixes (0x, 0b, 0o) are not supported
        """
        while self.peek().isdigit():
            self.advance()
        if self.peek() == "." and self.peek_next().isdigit():
            self.advance()
            while self.peek().isdigit():
                self.advance()
        value: float = float(self.source[self.start : self.idx])
        self.add_token(TokenType.NUMBER, value)
    def scan_identifier(self):
        """Scan the rest of an identifier and add it as a token
        An identifier starts with a letter, followed by any number of
        alphanumerical characters or underscores
        """
        while self.peek().isalnum() or self.peek() == "_":
            self.advance()
        lexeme: str = self.source[self.start : self.idx]
        token_type: TokenType = MIDAS_KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
        self.add_token(token_type)
    def scan_comment(self):
        """Scan the rest of a comment and add it as a token
        A comment starts with `//` and ends at the EOL/EOF
        """
        while self.peek() != "\n" and not self.is_at_end():
            self.advance()
        self.add_token(TokenType.COMMENT)
    def scan_comment_multiline(self):
        """Scan the rest of a multiline comment and add it as a token
        A multiline comment starts with `/*` and ends with `*/` or at the EOF
        """
        while (
            not (self.peek() == "*" and self.peek_next() == "/")
            and not self.is_at_end()
        ):
            self.advance()
        if not self.is_at_end():
            self.advance()
        if not self.is_at_end():
            self.advance()
        self.add_token(TokenType.COMMENT)
--- a/lexer/position.py
+++ b/lexer/position.py
@@ -0,0 +1,13 @@
 from dataclasses import dataclass
 from typing import Optional
@dataclass(frozen=True)
 class Position:
    """A simple structure to store the position of a token"""
    file: Optional[str]
    line: int
    column: int
    def __repr__(self):
        return f"{self.file or ''}L{self.line}:{self.column}"
--- a/lexer/token.py
+++ b/lexer/token.py
@@ -0,0 +1,59 @@
 from dataclasses import dataclass
 from enum import Enum, auto
 from typing import Any
 from lexer.position import Position
 class TokenType(Enum):
    # Punctuation
    LEFT_PAREN = auto()
    RIGHT_PAREN = auto()
    LEFT_BRACKET = auto()
    RIGHT_BRACKET = auto()
    LEFT_BRACE = auto()
    RIGHT_BRACE = auto()
    COLON = auto()
    COMMA = auto()
    UNDERSCORE = auto()
    # Operators
    PLUS = auto()
    MINUS = auto()
    STAR = auto()
    SLASH = auto()
    GREATER = auto()
    GREATER_EQUAL = auto()
    LESS = auto()
    LESS_EQUAL = auto()
    EQUAL = auto()
    EQUAL_EQUAL = auto()
    BANG_EQUAL = auto()
    # Literals
    IDENTIFIER = auto()
    NUMBER = auto()
    TRUE = auto()
    FALSE = auto()
    NONE = auto()
    # Keywords
    TYPE = auto()
    OP = auto()
    CONSTRAINT = auto()
    # Misc
    COMMENT = auto()
    WHITESPACE = auto()
    EOF = auto()
    NEWLINE = auto()
@dataclass(frozen=True)
 class Token:
    """A scanned token"""
    type: TokenType
    lexeme: str
    value: Any
    position: Position
--- a/parser/annotations.py
+++ b/parser/annotations.py
@@ -0,0 +1,152 @@
 from typing import Optional
 from core.ast.annotations import (
    AnnotationStmt,
    ConstraintExpr,
    Expr,
    LiteralExpr,
    SchemaElementExpr,
    SchemaExpr,
    Stmt,
    TypeExpr,
    WildcardExpr,
 )
 from lexer.token import Token, TokenType
 from parser.base import Parser
 from parser.errors import ParsingError
 class AnnotationParser(Parser):
    """A simple parser for custom type annotations"""
    SYNC_BOUNDARY: set[TokenType] = set()
    def parse(self) -> Optional[Stmt]:
        stmt: Optional[Stmt] = None
        try:
            stmt = self.annotation()
        except ParsingError:
            self.synchronize()
        if not self.is_at_end():
            self.error(self.peek(), "Extra tokens")
        return stmt
    def synchronize(self):
        """Skip tokens until a synchronization boundary is found
        This method allows gracefully recovering from a parse error
        to a safe place and continue parsing
        """
        self.advance()
        while not self.is_at_end():
            if self.peek().type in self.SYNC_BOUNDARY:
                return
            self.advance()
    def annotation(self) -> AnnotationStmt:
        """Parse an annotation
        An annotation is written as `Type` or `Type[Schema]`
        Returns:
            AnnotationStmt: the parsed annotation statement
        """
        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type identifier")
        schema: Optional[SchemaExpr] = None
        if self.match(TokenType.LEFT_BRACKET):
            schema = self.schema()
        return AnnotationStmt(name=name, schema=schema)
    def type_expr(self) -> TypeExpr:
        """Parse a type expression
        Returns:
            TypeExpr: the parsed type expression
        """
        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
        constraints: list[ConstraintExpr] = []
        while not self.is_at_end() and self.match(TokenType.PLUS):
            self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint")
            constraints.append(self.constraint_expr())
            self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint")
        return TypeExpr(name=name, constraints=constraints)
    def constraint_expr(self) -> ConstraintExpr:
        """Parse a type constraint
        Returns:
            ConstraintExpr: the parsed type constraint expression
        """
        left: Expr = self.constraint_value()
        op: Token = self.constraint_operator()
        right: Expr = self.constraint_value()
        return ConstraintExpr(left=left, op=op, right=right)
    def constraint_value(self) -> Expr:
        if self.match(TokenType.UNDERSCORE):
            return WildcardExpr(self.previous())
        return self.literal()
    def literal(self) -> LiteralExpr:
        if self.match(TokenType.FALSE):
            return LiteralExpr(False)
        if self.match(TokenType.TRUE):
            return LiteralExpr(True)
        if self.match(TokenType.NONE):
            return LiteralExpr(None)
        if self.match(TokenType.NUMBER):
            return LiteralExpr(self.previous().value)
        raise self.error(self.peek(), "Expected literal")
    def constraint_operator(self) -> Token:
        if self.match(TokenType.LESS, TokenType.LESS_EQUAL, TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.EQUAL_EQUAL, TokenType.BANG_EQUAL):
            return self.previous()
        raise self.error(self.peek(), "Expected constraint operator")
    def schema(self) -> SchemaExpr:
        """Parse a schema definition
        A comma separated list of schema elements
        Returns:
            SchemaExpr: the parsed schema expression
        """
        left: Token = self.previous()
        elements: list[Expr] = []
        while not self.check(TokenType.RIGHT_BRACKET) and not self.is_at_end():
            elements.append(self.schema_element())
            if not self.check(TokenType.RIGHT_BRACKET):
                self.consume(TokenType.COMMA, "Expected ',' between schema elements")
        right: Token = self.consume(TokenType.RIGHT_BRACKET, "Unclosed schema")
        return SchemaExpr(left=left, elements=elements, right=right)
    def schema_element(self) -> SchemaElementExpr:
        """Parse a schema element
        An anonymous element (`_`), a type, an untyped named column (`name: _`),
        or a named column (`name: Type`)
        Returns:
            SchemaElementExpr: the parsed schema element expression
        """
        if self.match(TokenType.UNDERSCORE):
            return SchemaElementExpr(name=None, type=None)
        if not self.check(TokenType.IDENTIFIER):
            raise self.error(self.peek(), "Expected schema element")
        name: Optional[Token] = None
        type: Optional[TypeExpr] = None
        if self.check_next(TokenType.COLON):
            name = self.advance()
            self.advance()
        if not self.match(TokenType.UNDERSCORE):
            type = self.type_expr()
        return SchemaElementExpr(name=name, type=type)
--- a/parser/base.py
+++ b/parser/base.py
@@ -0,0 +1,183 @@
 from abc import ABC, abstractmethod
 from dataclasses import dataclass
 from typing import Generic, TypeVar
 from lexer.token import Token, TokenType
 from parser.errors import ParsingError
@dataclass(frozen=True)
 class TokenError:
    """A parsing error linked to a particular token"""
    token: Token
    message: str
    def get_report(self) -> str:
        """Get a detailed error message
        Returns:
            str: the complete error message
        """
        where: str = f"'{self.token.lexeme}'"
        if self.token.type == TokenType.EOF:
            where = "end"
        return f"({self.token.position}) Error at {where}: {self.message}"
 T = TypeVar("T")
 class Parser(ABC, Generic[T]):
    """An abstract parser which provides methods to easily extend it into a concrete one
    This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
    more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
    [1]: https://craftinginterpreters.com/
    """
    IGNORE: set[TokenType] = {
        TokenType.WHITESPACE,
        TokenType.COMMENT,
        TokenType.NEWLINE,
    }
    def __init__(self, tokens: list[Token]) -> None:
        """Create a new parser to parse the given tokens
        Args:
            tokens (list[Token]): the tokens to parse
        """
        self.tokens: list[Token] = list(
            filter(lambda t: t.type not in self.IGNORE, tokens)
        )
        self.current: int = 0
        self.length: int = len(self.tokens)
        self.errors: list[TokenError] = []
    def error(self, token: Token, message: str):
        """Record an error
        Args:
            token (Token): the token at which the error was detected
            message (str): a message explaining the error
        Returns:
            ParsingError: the parsing error to raise
        """
        self.errors.append(TokenError(token=token, message=message))
        return ParsingError()
    @abstractmethod
    def parse(self) -> T:
        """Parse the tokens
        Returns:
            T: the parsed element(s)
        """
        pass
    def is_at_end(self) -> bool:
        """Whether the parser is at the end of the token list
        Returns:
            bool: True if the current index is at the end of the token list
        """
        return self.peek().type == TokenType.EOF
    def peek(self) -> Token:
        """Get the current token without advancing
        Returns:
            Token: the current token
        """
        return self.tokens[self.current]
    def previous(self) -> Token:
        """Get the previous token
        This function is unsafe and will raise an IndexError if called when
        the parser is at the begin of the token list
        Returns:
            Token: the previous token
        """
        return self.tokens[self.current - 1]
    def check(self, token_type: TokenType) -> bool:
        """Check whether the current token is of the given type
        This function always returns False if the parser is at the EOF token
        Args:
            token_type (TokenType): the type of token to check
        Returns:
            bool: True if the current token is of the given type and not EOF
        """
        if self.is_at_end():
            return False
        return self.peek().type == token_type
    def check_next(self, token_type: TokenType) -> bool:
        """Check whether the next token is of the given type
        This function always returns False if the parser is at the EOF token
        Args:
            token_type (TokenType): the type of token to check
        Returns:
            bool: True if the current token is of the given type and not EOF
        """
        if self.is_at_end():
            return False
        if self.current + 1 >= self.length:
            return False
        token: Token = self.tokens[self.current + 1]
        if token.type == TokenType.EOF:
            return False
        return token.type == token_type
    def advance(self) -> Token:
        """Consume and return the current token, if not at the EOF
        Returns:
            Token: the current token, before advancing
        """
        if not self.is_at_end():
            self.current += 1
        return self.previous()
    def match(self, *types: TokenType) -> bool:
        """Consume the next token if it matches one of the given types
        Returns:
            bool: whether a token was matched and consumed
        """
        for token_type in types:
            if self.check(token_type):
                self.advance()
                return True
        return False
    def consume(self, token_type: TokenType, error_msg: str) -> Token:
        """Consume the current token if it matches the given type or raise an error
        If the current token doesn't match the given type, an error is raised
        with the provided message
        Args:
            token_type (TokenType): the expected token type
            error_msg (str): the error message if the token doesn't match
        Raises:
            SyntaxError: if the current token doesn't match the given type
        Returns:
            Token: the current token which matched the given type
        """
        if self.check(token_type):
            return self.advance()
        raise self.error(self.peek(), error_msg)
--- a/parser/errors.py
+++ b/parser/errors.py
@@ -0,0 +1,2 @@
 class ParsingError(RuntimeError):
    pass
--- a/parser/midas.py
+++ b/parser/midas.py
@@ -0,0 +1,217 @@
 from typing import Optional
 from core.ast.midas import (
    ConstraintExpr,
    ConstraintStmt,
    Expr,
    LiteralExpr,
    OpStmt,
    PropertyStmt,
    Stmt,
    TypeBodyExpr,
    TypeExpr,
    TypeStmt,
    WildcardExpr,
 )
 from lexer.token import Token, TokenType
 from parser.base import Parser
 from parser.errors import ParsingError
 class MidasParser(Parser):
    """A simple parser for midas type definitions"""
    SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT}
    def parse(self) -> list[Stmt]:
        statements: list[Stmt] = []
        while not self.is_at_end():
            stmt: Optional[Stmt] = self.declaration()
            if stmt is None:
                print("Early stop")
                break
            statements.append(stmt)
        return statements
    def synchronize(self):
        """Skip tokens until a synchronization boundary is found
        This method allows gracefully recovering from a parse error
        to a safe place and continue parsing
        """
        self.advance()
        while not self.is_at_end():
            if self.previous().type == TokenType.NEWLINE:
                return
            if self.peek().type in self.SYNC_BOUNDARY:
                return
            self.advance()
    def declaration(self) -> Optional[Stmt]:
        """Try and parse a declaration
        Any parsing error is caught and None is returned
        Returns:
            Optional[Stmt]: the parsed Midas statement, or None if a ParsingError was raised
        """
        try:
            if self.match(TokenType.TYPE):
                return self.type_declaration()
            if self.match(TokenType.OP):
                return self.op_declaration()
            if self.match(TokenType.CONSTRAINT):
                return self.constraint_declaration()
            raise self.error(self.peek(), "Unexpected token")
        except ParsingError:
            self.synchronize()
            return None
    def type_declaration(self) -> TypeStmt:
        """Parse a type declaration
        A type declaration is written `type Name<TypeExpr, ...>` optionally followed by a brace-wrapped body
        Returns:
            TypeStmt: the parsed type declaration statement
        """
        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
        self.consume(TokenType.LESS, "Expected '<' after type name")
        bases: list[TypeExpr] = []
        while not self.check(TokenType.GREATER) and not self.is_at_end():
            bases.append(self.type_expr())
            if not self.check(TokenType.GREATER):
                self.consume(TokenType.COMMA, "Expected ',' between type bases")
        self.consume(TokenType.GREATER, "Expected '>' after base type")
        body: Optional[TypeBodyExpr] = None
        if self.check(TokenType.LEFT_BRACE):
            body = self.type_body_expr()
        return TypeStmt(name=name, bases=bases, body=body)
    def type_expr(self) -> TypeExpr:
        """Parse a type expression
        Returns:
            TypeExpr: the parsed type expression
        """
        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
        constraints: list[ConstraintExpr] = []
        while not self.is_at_end() and self.match(TokenType.PLUS):
            self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint")
            constraints.append(self.constraint_expr())
            self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint")
        return TypeExpr(name=name, constraints=constraints)
    def constraint_expr(self) -> ConstraintExpr:
        """Parse a type constraint
        Returns:
            ConstraintExpr: the parsed type constraint expression
        """
        left: Expr = self.constraint_value()
        op: Token = self.constraint_operator()
        right: Expr = self.constraint_value()
        return ConstraintExpr(left=left, op=op, right=right)
    def constraint_value(self) -> Expr:
        if self.match(TokenType.UNDERSCORE):
            return WildcardExpr(self.previous())
        return self.literal()
    def literal(self) -> LiteralExpr:
        if self.match(TokenType.FALSE):
            return LiteralExpr(False)
        if self.match(TokenType.TRUE):
            return LiteralExpr(True)
        if self.match(TokenType.NONE):
            return LiteralExpr(None)
        if self.match(TokenType.NUMBER):
            return LiteralExpr(self.previous().value)
        raise self.error(self.peek(), "Expected literal")
    def constraint_operator(self) -> Token:
        if self.match(
            TokenType.LESS,
            TokenType.LESS_EQUAL,
            TokenType.GREATER,
            TokenType.GREATER_EQUAL,
            TokenType.EQUAL_EQUAL,
            TokenType.BANG_EQUAL,
        ):
            return self.previous()
        raise self.error(self.peek(), "Expected constraint operator")
    def type_body_expr(self) -> TypeBodyExpr:
        """Parse a type definition body
        A type definition body is a set of whitespace-separated
        property statements enclosed in curly braces
        Returns:
            TypeBodyExpr: the parsed type body expression
        """
        self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body")
        properties: list[PropertyStmt] = []
        while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end():
            properties.append(self.property_stmt())
        self.consume(TokenType.RIGHT_BRACE, "Unclosed type body")
        return TypeBodyExpr(properties=properties)
    def property_stmt(self) -> PropertyStmt:
        """Parse a property statement
        A type property statement is written `name: Type`
        Returns:
            PropertyStmt: the parsed property statement
        """
        name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name")
        self.consume(TokenType.COLON, "Expected ':' after property name")
        type: TypeExpr = self.type_expr()
        return PropertyStmt(name=name, type=type)
    def op_declaration(self) -> OpStmt:
        """Parse an operation definition
        An operation is written `op <Type1> operator <Type2> = <Type3>` where `operator` can be any single token
        Returns:
            OpStmt: the parsed operation statement
        """
        self.consume(TokenType.LESS, "Expected '<' before first type")
        left: TypeExpr = self.type_expr()
        self.consume(TokenType.GREATER, "Expected '>' after first type")
        op: Token = self.advance()
        self.consume(TokenType.LESS, "Expected '<' before second type")
        right: TypeExpr = self.type_expr()
        self.consume(TokenType.GREATER, "Expected '>' after second type")
        self.consume(TokenType.EQUAL, "Expected '=' after second type")
        self.consume(TokenType.LESS, "Expected '<' before result type")
        result: TypeExpr = self.type_expr()
        self.consume(TokenType.GREATER, "Expected '>' after result type")
        return OpStmt(left=left, op=op, right=right, result=result)
    def constraint_declaration(self) -> ConstraintStmt:
        """Parse a type constraint declaration
        A constraint is written `constraint Name = constraint_expression`
        Returns:
            ConstraintStmt: the parsed constraint declaration statement
        """
        name: Token = self.consume(TokenType.IDENTIFIER, "Expected constraint name")
        self.consume(TokenType.EQUAL, "Expected '=' after constraint name")
        constraint: ConstraintExpr = self.constraint_expr()
        return ConstraintStmt(name=name, constraint=constraint)
--- a/syntax/annotations.ebnf
+++ b/syntax/annotations.ebnf
@@ -0,0 +1,20 @@
 identifier ::= '[a-zA-Z][a-zA-Z_]*'
 integer ::= '\d+'
 number ::= integer ["." integer]
 boolean ::= "False" | "True"
 none ::= "None"
 value ::= number | boolean | none
 lambda-value ::= "_" | value
 lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!="
 lambda ::= lambda-value lambda-operator lambda-value
 constraint ::= identifier | "(" lambda ")"
 base-type ::= identifier
 type ::= base-type { "+" constraint }
 column-type ::= type | "_"
 column-def ::= [ identifier ":" ] column-type
 frame-def ::= column-def { "," column-def }
--- a/syntax/annotations.typ
+++ b/syntax/annotations.typ
@@ -0,0 +1,74 @@
 #import "@preview/fervojo:0.1.1": render
 #let value = ```
 {[`value` <
  [`number` 'digit' * ! <!, ["." 'digit' * !]>],
  [`boolean` <"False", "True">],
  [`none` "None"]
 >]}
 ```
 #let constraint = ```
 {[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]}
 ```
 #let type-with-constraints = ```
 {[`type-with-constraints` 'identifier' <!, ["+" "(" 'constraint' ")"] * !>]}
 ```
 #let column-def = ```
 {[`column-def` <!, ['identifier' ":"]> <"_", 'type-with-constraints'>]}
 ```
 #let frame-def = ```
 {[`frame-def` 'column-def' * ","]}
 ```
 #let annotation = ```
 {[`annotation` 'identifier' <!, ["[" 'frame-def' "]"]>]}
 ```
 #let rules = (
  value,
  constraint,
  type-with-constraints,
  column-def,
  frame-def,
  annotation,
 )
 #set text(font: "Source Sans 3")
 = Type annotation syntax
 #for rule in rules {
  render(rule)
 }
 /*
 #let by-name = (
  annotation: annotation,
  frame-def: frame-def,
  column-def: column-def,
  type-with-constraints: type-with-constraints,
  constraint: constraint,
  value: value,
 )
 #let substitute(base-rule) = {
  let new-rule = base-rule
  for (key, rule) in by-name.pairs() {
    new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1))
  }
  if new-rule != base-rule {
    new-rule = substitute(new-rule)
  }
  return new-rule
 }
 #let combined = raw(substitute(annotation.text))
 #set page(flipped: true)
 #render(combined)
 */
--- a/syntax/midas.ebnf
+++ b/syntax/midas.ebnf
@@ -0,0 +1,26 @@
 identifier ::= '[a-zA-Z][a-zA-Z_]*'
 integer ::= '\d+'
 number ::= integer ["." integer]
 boolean ::= "False" | "True"
 none ::= "None"
 value ::= number | boolean | none
 lambda-value ::= "_" | value
 lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!="
 lambda ::= lambda-value lambda-operator lambda-value
 constraint ::= identifier | "(" lambda ")"
 base-type ::= identifier
 type ::= base-type { "+" constraint }
 type-property ::= 'identifier' ":" 'type'
 type-body ::= "{" { 'type-property' } "}"
 operation-type ::= "<" 'type' ">"
 type-statement ::= "type" 'identifier' "<" 'type' {"," 'type'} ">" ['type-body']
 operation-statement ::= "op" 'operation-type' 'operator' 'operation-type' "=" 'operation-type'
 constraint-statement ::= "constraint" 'identifier' "=" 'lambda'
 statement ::= type-statement | operation-statement | constraint-statement
--- a/syntax/midas.typ
+++ b/syntax/midas.typ
@@ -0,0 +1,97 @@
 #import "@preview/fervojo:0.1.1": render
 #let value = ```
 {[`value` <
  [`number` 'digit' * ! <!, ["." 'digit' * !]>],
  [`boolean` <"False", "True">],
  [`none` "None"]
 >]}
 ```
 #let constraint = ```
 {[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]}
 ```
 #let type-with-constraints = ```
 {[`type-with-constraints` 'identifier' <!, ["+" "(" 'constraint' ")"] * !>]}
 ```
 #let type-property = ```
 {[`type-property` 'identifier' ":" 'type-with-constraints']}
 ```
 #let type-body = ```
 {[`type-body` "{" <!, 'type-property'*!> "}"]}
 ```
 #let operation-type = ```
 {[`operation-type` "<" 'type-with-constraints' ">"]}
 ```
 #let type-statement = ```
 {[`type-statement` "type" 'identifier' "<" 'type-with-constraints'*"," ">" <!, 'type-body'>]}
 ```
 #let operation-statement = ```
 {[`operation-statement` "op" 'operation-type' "operator" 'operation-type' "=" 'operation-type']}
 ```
 #let constraint-statement = ```
 {[`constraint-statement` "constraint" 'identifier' "=" 'constraint']}
 ```
 #let statement = ```
 {[`statement` <'type-statement', 'operation-statement', 'constraint-statement'>]}
 ```
 #let rules = (
  value,
  constraint,
  type-with-constraints,
  type-property,
  type-body,
  operation-type,
  type-statement,
  operation-statement,
  constraint-statement,
  statement,
 )
 #set text(font: "Source Sans 3")
 = Midas type definition syntax
 #for rule in rules {
  render(rule)
 }
 /*
 #let by-name = (
  value: value,
  constraint: constraint,
  type-with-constraints: type-with-constraints,
  type-property: type-property,
  type-body: type-body,
  operation-type: operation-type,
  type-statement: type-statement,
  operation-statement: operation-statement,
  constraint-statement: constraint-statement,
 )
 #let substitute(base-rule) = {
  let new-rule = base-rule
  for (key, rule) in by-name.pairs() {
    new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1))
  }
  if new-rule != base-rule {
    new-rule = substitute(new-rule)
  }
  return new-rule.replace(regex("`.*?`"), "")
 }
 #let combined = raw(substitute(statement.text))
 #set page(flipped: true)
 #render(combined)
 */
--- a/test.py
+++ b/test.py
@@ -0,0 +1,52 @@
 import importlib
 from pathlib import Path
 from core.ast.printer import AnnotationAstPrinter, MidasAstPrinter
 from lexer.annotations import AnnotationLexer
 from lexer.midas import MidasLexer
 from lexer.token import Token
 from parser.annotations import AnnotationParser
 from parser.midas import MidasParser
 def test_annotation():
    # Frame annotation
    mod = importlib.import_module("examples.00_syntax_prototype.01_simple_types")
    annotation: str = mod.__annotations__["df"]
    lexer: AnnotationLexer = AnnotationLexer(annotation, "01_simple_types.py")
    tokens: list[Token] = lexer.process()
    # print([f"{t.type.name}('{t.lexeme}')" for t in tokens])
    parser = AnnotationParser(tokens)
    parsed = parser.parse()
    print(parsed)
    for err in parser.errors:
        print(err.get_report())
    printer = AnnotationAstPrinter()
    if parsed is not None:
        print(printer.print(parsed))
 def test_midas():
    # Midas type definitions
    path: Path = Path("examples") / "00_syntax_prototype" / "02_custom_types.midas"
    definitions: str = path.read_text()
    midas_lexer: MidasLexer = MidasLexer(definitions, path.name)
    tokens: list[Token] = midas_lexer.process()
    # print([f"{t.type.name}('{t.lexeme}')" for t in tokens])
    parser = MidasParser(tokens)
    parsed = parser.parse()
    print(parsed)
    for err in parser.errors:
        print(err.get_report())
    printer = MidasAstPrinter()
    for stmt in parsed:
        if stmt is None:
            print("None")
            continue
        print(printer.print(stmt))
 test_midas()
--- a/tests/lexer/test_annotation_lexer.py
+++ b/tests/lexer/test_annotation_lexer.py
@@ -0,0 +1,129 @@
 from typing import Any
 import pytest
 from lexer.annotations import AnnotationLexer
 from lexer.token import Token, TokenType
 def scan(source: str) -> list[Token]:
    return AnnotationLexer(source).process()
 def assert_n_tokens(tokens: list[Token], n: int):
    assert len(tokens) == n + 1
    assert tokens[-1].type == TokenType.EOF
@pytest.mark.parametrize(
    "src,expected",
    [
        ("(", TokenType.LEFT_PAREN),
        (")", TokenType.RIGHT_PAREN),
        ("[", TokenType.LEFT_BRACKET),
        ("]", TokenType.RIGHT_BRACKET),
        (":", TokenType.COLON),
        (",", TokenType.COMMA),
        ("_", TokenType.UNDERSCORE),
    ],
 )
 def test_punctuation(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected",
    [
        ("+", TokenType.PLUS),
        (">", TokenType.GREATER),
        (">=", TokenType.GREATER_EQUAL),
        ("<", TokenType.LESS),
        ("<=", TokenType.LESS_EQUAL),
        ("=", TokenType.EQUAL),
        ("==", TokenType.EQUAL_EQUAL),
        ("!=", TokenType.BANG_EQUAL),
    ],
 )
 def test_operators(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected",
    [
        ("a", TokenType.IDENTIFIER),
        ("foo", TokenType.IDENTIFIER),
        ("foo1", TokenType.IDENTIFIER),
        ("foo_", TokenType.IDENTIFIER),
        ("foo_bar1_baz2", TokenType.IDENTIFIER),
        ("FOO_BAR1_BAZ2", TokenType.IDENTIFIER),
        ("True", TokenType.TRUE),
        ("False", TokenType.FALSE),
        ("None", TokenType.NONE),
    ],
 )
 def test_identifiers_keywords(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected",
    [
        ("#", TokenType.COMMENT),
        ("# This is a comment", TokenType.COMMENT),
        (" ", TokenType.WHITESPACE),
        ("\t", TokenType.WHITESPACE),
        ("\r", TokenType.WHITESPACE),
        ("  \t  \t", TokenType.WHITESPACE),
        ("\n", TokenType.NEWLINE),
    ],
 )
 def test_misc(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected_type,expected_value",
    [
        ("0", TokenType.NUMBER, 0),
        ("0.0", TokenType.NUMBER, 0),
        ("1234.56", TokenType.NUMBER, 1234.56),
    ],
 )
 def test_literals(src: str, expected_type: TokenType, expected_value: Any):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected_type
    assert tokens[0].value == expected_value
 def test_single_bang_error():
    with pytest.raises(SyntaxError):
        scan("!")
@pytest.mark.parametrize(
    "src",
    [
        "-",
        "*",
        "/",
        "{",
        "}",
        "@",
        '"',
        "'",
        ".",
    ],
 )
 def test_unexpected_character(src: str):
    with pytest.raises(SyntaxError):
        scan(src)
--- a/tests/lexer/test_midas_lexer.py
+++ b/tests/lexer/test_midas_lexer.py
@@ -0,0 +1,129 @@
 from typing import Any
 import pytest
 from lexer.midas import MidasLexer
 from lexer.token import Token, TokenType
 def scan(source: str) -> list[Token]:
    return MidasLexer(source).process()
 def assert_n_tokens(tokens: list[Token], n: int):
    assert len(tokens) == n + 1
    assert tokens[-1].type == TokenType.EOF
@pytest.mark.parametrize(
    "src,expected",
    [
        ("(", TokenType.LEFT_PAREN),
        (")", TokenType.RIGHT_PAREN),
        ("[", TokenType.LEFT_BRACKET),
        ("]", TokenType.RIGHT_BRACKET),
        ("{", TokenType.LEFT_BRACE),
        ("}", TokenType.RIGHT_BRACE),
        (":", TokenType.COLON),
        (",", TokenType.COMMA),
        ("_", TokenType.UNDERSCORE),
    ],
 )
 def test_punctuation(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected",
    [
        ("+", TokenType.PLUS),
        ("-", TokenType.MINUS),
        ("*", TokenType.STAR),
        ("/", TokenType.SLASH),
        (">", TokenType.GREATER),
        (">=", TokenType.GREATER_EQUAL),
        ("<", TokenType.LESS),
        ("<=", TokenType.LESS_EQUAL),
        ("=", TokenType.EQUAL),
        ("==", TokenType.EQUAL_EQUAL),
        ("!=", TokenType.BANG_EQUAL),
    ],
 )
 def test_operators(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected",
    [
        ("a", TokenType.IDENTIFIER),
        ("foo", TokenType.IDENTIFIER),
        ("foo1", TokenType.IDENTIFIER),
        ("foo_", TokenType.IDENTIFIER),
        ("foo_bar1_baz2", TokenType.IDENTIFIER),
        ("FOO_BAR1_BAZ2", TokenType.IDENTIFIER),
        ("true", TokenType.TRUE),
        ("false", TokenType.FALSE),
        ("none", TokenType.NONE),
    ],
 )
 def test_identifiers_keywords(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected",
    [
        ("// This is a comment", TokenType.COMMENT),
        ("/* This is a comment */", TokenType.COMMENT),
        (" ", TokenType.WHITESPACE),
        ("\t", TokenType.WHITESPACE),
        ("\r", TokenType.WHITESPACE),
        ("  \t  \t", TokenType.WHITESPACE),
        ("\n", TokenType.NEWLINE),
    ],
 )
 def test_misc(src: str, expected: TokenType):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected
@pytest.mark.parametrize(
    "src,expected_type,expected_value",
    [
        ("0", TokenType.NUMBER, 0),
        ("0.0", TokenType.NUMBER, 0),
        ("1234.56", TokenType.NUMBER, 1234.56),
    ],
 )
 def test_literals(src: str, expected_type: TokenType, expected_value: Any):
    tokens: list[Token] = scan(src)
    assert_n_tokens(tokens, 1)
    assert tokens[0].type == expected_type
    assert tokens[0].value == expected_value
 def test_single_bang_error():
    with pytest.raises(SyntaxError):
        scan("!")
@pytest.mark.parametrize(
    "src",
    [
        "@",
        '"',
        "'",
        ".",
    ],
 )
 def test_unexpected_character(src: str):
    with pytest.raises(SyntaxError):
        scan(src)
--- a/tests/parser/test_annotation_parser.py
+++ b/tests/parser/test_annotation_parser.py
@@ -0,0 +1,130 @@
 from typing import Optional
 import pytest
 from core.ast.annotations import (
    AnnotationStmt,
    ConstraintExpr,
    Expr,
    LiteralExpr,
    SchemaElementExpr,
    SchemaExpr,
    Stmt,
    TypeExpr,
    WildcardExpr,
 )
 from lexer.annotations import AnnotationLexer
 from lexer.position import Position
 from lexer.token import Token
 from parser.annotations import AnnotationParser
 class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]):
    def serialize(self, stmt: Stmt):
        return stmt.accept(self)
    def visit_annotation_stmt(self, stmt: AnnotationStmt) -> str:
        schema: str = ""
        if stmt.schema is not None:
            schema = " " + stmt.schema.accept(self)
        return f"(annotation {stmt.name.lexeme}{schema})"
    def visit_schema_expr(self, expr: SchemaExpr) -> str:
        elements: list[str] = [elmt.accept(self) for elmt in expr.elements]
        return f"(schema {' '.join(elements)})"
    def visit_schema_element_expr(self, expr: SchemaElementExpr) -> str:
        name: str = expr.name.lexeme if expr.name is not None else "_"
        type: str = expr.type.accept(self) if expr.type is not None else "_"
        return f"({name} {type})"
    def visit_type_expr(self, expr: TypeExpr) -> str:
        res: str = f"({expr.name.lexeme}"
        for constraint in expr.constraints:
            res += " " + constraint.accept(self)
        res += ")"
        return res
    def visit_constraint_expr(self, expr: ConstraintExpr) -> str:
        return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})"
    def visit_wildcard_expr(self, expr: WildcardExpr) -> str:
        return "(_)"
    def visit_literal_expr(self, expr: LiteralExpr) -> str:
        return f"({expr.value})"
 def parse(source: str) -> Optional[Stmt]:
    tokens: list[Token] = AnnotationLexer(source).process()
    return AnnotationParser(tokens).parse()
 def must_parse(source: str) -> Stmt:
    stmt: Optional[Stmt] = parse(source)
    assert stmt is not None
    return stmt
 def ast_str(source: str) -> str:
    stmt: Stmt = must_parse(source)
    return AstSerializer().serialize(stmt)
@pytest.mark.parametrize(
    "src,expected",
    [
        ("Type", "(annotation Type)"),
        ("Type[]", "(annotation Type (schema ))"),
        (
            """
            Frame[
                verified: bool,
                birth_year: int,
                height: float + ( _ > 0 ) + ( _ < 250 ),
                name: str,
                date: datetime,
                float,  # unnamed
                unknown: _,  # untyped
                _  # unnamed and untyped
            ]
            """,
            "(annotation Frame (schema (verified (bool)) (birth_year (int)) (height (float (constraint (_) > (0.0)) (constraint (_) < (250.0)))) (name (str)) (date (datetime)) (_ (float)) (unknown _) (_ _)))",
        ),
    ],
 )
 def test_expressions(src: str, expected: str):
    assert ast_str(src) == expected
@pytest.mark.parametrize(
    "src,pos,should_fail",
    [
        ("", (1, 1), True),
        ("42", (1, 1), True),
        ("True", (1, 1), True),
        ("Type[", (1, 6), True),
        ("Type[] Type2", (1, 8), False),
        ("Type[bool:]", (1, 11), True),
        ("Type[3]", (1, 6), True),
        ("Type[bool float]", (1, 11), True),
        ("Type[bool (_ < 2)]", (1, 11), True),
        ("Type[bool + _ < 2)]", (1, 13), True),
        ("Type[bool + (_ < 2]", (1, 19), True),
        ("Type[bool + (< 2)]", (1, 14), True),
        ("Type[bool + (_ + 2)]", (1, 16), True),
        ("Type[bool + (Foo + Bar)]", (1, 14), True),
        # ("Type[bool,]", (1, 11), True),  # trailing comma is accepted, TODO: update parser or EBNF
        ("Type[bool, Type[]]", (1, 16), True),
        ("Type[foo: 3]", (1, 11), True),
    ],
 )
 def test_parsing_error(src: str, pos: tuple[int, int], should_fail: bool):
    tokens: list[Token] = AnnotationLexer(src).process()
    parser: AnnotationParser = AnnotationParser(tokens)
    stmt: Optional[Stmt] = parser.parse()
    if should_fail:
        assert stmt is None
    assert len(parser.errors) != 0
    error_pos: Position = parser.errors[0].token.position
    assert (error_pos.line, error_pos.column) == pos
--- a/tests/parser/test_midas_parser.py
+++ b/tests/parser/test_midas_parser.py
@@ -0,0 +1,202 @@
 import textwrap
 import pytest
 from core.ast.midas import (
    ConstraintExpr,
    ConstraintStmt,
    Expr,
    LiteralExpr,
    OpStmt,
    PropertyStmt,
    Stmt,
    TypeBodyExpr,
    TypeExpr,
    TypeStmt,
    WildcardExpr,
 )
 from lexer.midas import MidasLexer
 from lexer.position import Position
 from lexer.token import Token
 from parser.midas import MidasParser
 class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]):
    def serialize(self, stmt: Stmt):
        return stmt.accept(self)
    def visit_type_stmt(self, stmt: TypeStmt) -> str:
        res: str = f"(type_def {stmt.name.lexeme}"
        for base in stmt.bases:
            res += " " + base.accept(self)
        if stmt.body is not None:
            res += " " + stmt.body.accept(self)
        res += ")"
        return res
    def visit_type_expr(self, expr: TypeExpr) -> str:
        res: str = f"({expr.name.lexeme}"
        for constraint in expr.constraints:
            res += " " + constraint.accept(self)
        res += ")"
        return res
    def visit_constraint_expr(self, expr: ConstraintExpr) -> str:
        return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})"
    def visit_wildcard_expr(self, expr: WildcardExpr) -> str:
        return "(_)"
    def visit_literal_expr(self, expr: LiteralExpr) -> str:
        return f"({expr.value})"
    def visit_type_body_expr(self, expr: TypeBodyExpr) -> str:
        res: str = "(body"
        for prop in expr.properties:
            res += " " + prop.accept(self)
        res += ")"
        return res
    def visit_property_stmt(self, stmt: PropertyStmt) -> str:
        return f"(property {stmt.name.lexeme} {stmt.type.accept(self)})"
    def visit_op_stmt(self, stmt: OpStmt) -> str:
        left: str = stmt.left.accept(self)
        right: str = stmt.right.accept(self)
        result: str = stmt.result.accept(self)
        return f"(op_def {left} {stmt.op.lexeme} {right} {result})"
    def visit_constraint_stmt(self, stmt: ConstraintStmt) -> str:
        return f"(constraint_def {stmt.name.lexeme} {stmt.constraint.accept(self)})"
 def parse(source: str) -> list[Stmt]:
    tokens: list[Token] = MidasLexer(source).process()
    return MidasParser(tokens).parse()
 def ast_str(source: str) -> list[str]:
    stmts: list[Stmt] = parse(source)
    return [AstSerializer().serialize(stmt) for stmt in stmts]
@pytest.mark.parametrize(
    "src,expected",
    [
        ("type Foo<>", "(type_def Foo)"),
        ("type Foo<Bar>", "(type_def Foo (Bar))"),
        ("type Foo<Bar, Baz>", "(type_def Foo (Bar) (Baz))"),
        (
            "type Foo<Bar + (_ < 2), Baz>",
            "(type_def Foo (Bar (constraint (_) < (2.0))) (Baz))",
        ),
        (
            """
            type Foo<> {
                foo: Bar
            }
            """,
            "(type_def Foo (body (property foo (Bar))))",
        ),
        (
            """
            type Foo<> {
                foo: Bar + (_ != none)
                foo2: Bar2 + (0 <= _) + (_ <= 100)
            }
            """,
            "(type_def Foo (body (property foo (Bar (constraint (_) != (None)))) (property foo2 (Bar2 (constraint (0.0) <= (_)) (constraint (_) <= (100.0))))))",
        ),
        ("op <A> + <B> = <C>", "(op_def (A) + (B) (C))"),
        (
            "op <A + (_ < 100)> + <B + (_ < 100)> = <C + (_ < 200)>",
            "(op_def (A (constraint (_) < (100.0))) + (B (constraint (_) < (100.0))) (C (constraint (_) < (200.0))))",
        ),
        (
            "constraint Positive = _ >= 0",
            "(constraint_def Positive (constraint (_) >= (0.0)))",
        ),
    ],
 )
 def test_expressions(src: str, expected: str | list[str]):
    if isinstance(expected, str):
        expected = [expected]
    assert ast_str(src) == expected
@pytest.mark.parametrize(
    "src,pos",
    [
        ###
        # Misc
        ###
        ("42", (1, 1)),
        ("true", (1, 1)),
        ("foo", (1, 1)),
        ###
        # Type statements
        ###
        ("type", (1, 5)),
        ("type true", (1, 6)),
        ("type Foo", (1, 9)),
        ("type Foo<1>", (1, 10)),
        # ("type Foo<float,>", (1, 16)),  # trailing comma is accepted, TODO: update parser or EBNF
        ("type Foo<float, 1>", (1, 17)),
        ("type Foo<float", (1, 15)),
        ("type Foo<float> { 3 }", (1, 19)),
        (
            """
            type Foo<float> {
                foo
            }
            """,
            (4, 1),
        ),
        (
            """
            type Foo<float> {
                foo: 3
            }
            """,
            (3, 10),
        ),
        ###
        # Operation statements
        ###
        ("op", (1, 3)),
        ("op float", (1, 4)),
        ("op <", (1, 5)),
        ("op <float", (1, 10)),
        ("op <float>", (1, 11)),
        ("op <float> +", (1, 13)),
        ("op <float> + float", (1, 14)),
        ("op <float> + <", (1, 15)),
        ("op <float> + <float", (1, 20)),
        ("op <float> + <float>", (1, 21)),
        ("op <float> + <float> =", (1, 23)),
        ("op <float> + <float> = float", (1, 24)),
        ("op <float> + <float> = <", (1, 25)),
        ("op <float> + <float> = <float", (1, 30)),
        ("op <float + 3> + <float> = <float>", (1, 13)),
        ("op <float> + <float + 3> = <float>", (1, 23)),
        ("op <float> + <float> = <float + 3>", (1, 33)),
        ###
        # Constraint statements
        ###
        ("constraint", (1, 11)),
        ("constraint 3", (1, 12)),
        ("constraint Foo", (1, 15)),
        ("constraint Foo =", (1, 17)),
        ("constraint Foo = 3", (1, 19)),
        ("constraint Foo = 3 <", (1, 21)),
    ],
 )
 def test_parsing_error(src: str, pos: tuple[int, int]):
    src = textwrap.dedent(src)
    tokens: list[Token] = MidasLexer(src).process()
    parser: MidasParser = MidasParser(tokens)
    stmt: list[Stmt] = parser.parse()
    assert len(stmt) == 0
    assert len(parser.errors) != 0
    error_pos: Position = parser.errors[0].token.position
    assert (error_pos.line, error_pos.column) == pos
--- a/vscode-ext/language-configurations.json
+++ b/vscode-ext/language-configurations.json
@@ -0,0 +1,19 @@
 {
    "brackets": [
        ["{", "}"],
        ["[", "]"],
        ["<", ">"]
    ],
    "autoClosingPairs": [
        { "open": "{", "close": "}" },
        { "open": "[", "close": "]" },
        { "open": "(", "close": ")" },
        { "open": "<", "close": ">" }
    ],
    "surroundingPairs": [
        ["{", "}"],
        ["[", "]"],
        ["(", ")"],
        ["<", ">"]
    ]
 }
--- a/vscode-ext/package.json
+++ b/vscode-ext/package.json
@@ -0,0 +1,33 @@
 {
    "name": "midas",
    "version": "0.1.0",
    "engines": {
        "vscode": "*"
    },
    "categories": ["Programming Languages"],
    "contributes": {
        "languages": [
            {
                "id": "midas",
                "extensions": [
                    ".mpy",
                    ".midas"
                ],
                "aliases": [
                    "Midas"
                ],
                "configuration": "./language-configuration.json"
            }
        ],
        "grammars": [
            {
                "language": "midas",
                "scopeName": "source.midas",
                "path": "./syntaxes/midas.tmLanguage.json",
                "embeddedLanguages": {
                    "meta.embedded.block.python": "python"
                }
            }
        ]
    }
 }
--- a/vscode-ext/syntaxes/midas.tmLanguage.json
+++ b/vscode-ext/syntaxes/midas.tmLanguage.json
@@ -0,0 +1,135 @@
 {
    "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
    "name": "Midas",
    "scopeName": "source.midas",
    "patterns": [{ "include": "#statement" }],
    "repository": {
        "comment": {
            "begin": "(//)",
            "end": "($)",
            "name": "comment.line",
            "beginCaptures": {
                "1": {
                    "name": "comment.line.double-dash"
                }
            }
        },
        "type-def": {
            "begin": "\\b(type)\\s+([a-zA-Z_][a-zA-Z_\\d]*)",
            "end": "$",
            "beginCaptures": {
                "1": {
                    "name": "keyword.control.type.midas"
                },
                "2": {
                    "name" : "variable.name"
                }
            },
            "patterns": [
                { "include": "#type-base" },
                { "include": "#type-body" }
            ]
        },
        "type-base": {
            "begin": "<",
            "end": ">",
            "beginCaptures": {
                "0": {
                    "name": "punctuation.definition.base.begin.midas"
                }
            },
            "endCaptures": {
                "0": {
                    "name": "punctuation.definition.base.end.midas"
                }
            },
            "patterns": [
                {"include": "source.python"}
            ]
        },
        "type-body": {
            "begin": "\\{",
            "end": "\\}",
            "beginCaptures": {
                "0": {
                    "name": "punctuation.definition.type-body.begin.midas"
                }
            },
            "endCaptures": {
                "0": {
                    "name": "punctuation.definition.type-body.end.midas"
                }
            },
            "patterns": [
                {"include": "#type-prop"}
            ]
        },
        "type-prop": {
            "match": "([a-zA-Z_][a-zA-Z_\\d]*)(:)\\s*([a-zA-Z_][a-zA-Z_\\d]*)",
            "captures": {
                "1": {
                    "name": "variable.name"
                },
                "2": {
                    "name": "punctuation.separator.annotation.midas"
                },
                "3": {
                    "name": "meta.type.name"
                }
            }
        },
        "op-def": {
            "match": "\\b(op)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>\\s+(\\S+)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>\\s+(=)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>",
            "captures": {
                "1": {
                    "name": "keyword.control.op.midas"
                },
                "2": {
                    "name" : "variable.name"
                },
                "3": {
                    "name" : "keyword.operator"
                },
                "4": {
                    "name" : "variable.name"
                },
                "5": {
                    "name" : "keyword.operator.assignment"
                },
                "6": {
                    "name" : "variable.name"
                }
            },
            "patterns": [
                { "include": "#type-base" },
                { "include": "#type-body" }
            ]
        },
        "constr-def": {
            "begin": "(constraint)\\s+([a-zA-Z_][a-zA-Z_\\d]*)\\s*(=)",
            "end": "$",
            "beginCaptures": {
                "1": {
                    "name": "keyword.control.constr.midas"
                },
                "2": {
                    "name": "variable.name"
                },
                "3": {
                    "name": "keyword.operator.assignment"
                }
            },
            "patterns": [
                { "include": "source.python" }
            ]
        },
        "statement": {
            "patterns": [
                { "include": "#comment" },
                { "include": "#type-def" },
                { "include": "#op-def" },
                { "include": "#constr-def" }
            ]
        }
    }
 }