fix(parser): handle extra tokens in Midas parser

tests(parser): add tests for midas parser
tests(parser): add tests for annotation parser
2026-05-18 18:43:35 +02:00 · 2026-05-18 18:43:03 +02:00 · 2026-05-18 14:42:27 +02:00 · 2026-05-18 13:43:12 +02:00 · 2026-05-18 13:35:23 +02:00 · 2026-05-18 13:22:11 +02:00
29 changed files with 2643 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@@ -0,0 +1,6 @@
+.vscode
+__pycache__
+.env
+venv
+.venv
+*.pyc
--- a/core/ast/annotations.py
+++ b/core/ast/annotations.py
@@ -0,0 +1,107 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Generic, Optional, TypeVar
+
+from lexer.token import Token
+
+T = TypeVar("T")
+
+
+@dataclass(frozen=True)
+class Stmt(ABC):
+    @abstractmethod
+    def accept(self, visitor: Visitor[T]) -> T: ...
+
+    class Visitor(ABC, Generic[T]):
+        @abstractmethod
+        def visit_annotation_stmt(self, stmt: AnnotationStmt) -> T: ...
+
+
+@dataclass(frozen=True)
+class AnnotationStmt(Stmt):
+    name: Token
+    schema: Optional[SchemaExpr]
+
+    def accept(self, visitor: Stmt.Visitor[T]) -> T:
+        return visitor.visit_annotation_stmt(self)
+
+
+@dataclass(frozen=True)
+class Expr(ABC):
+    @abstractmethod
+    def accept(self, visitor: Visitor[T]) -> T: ...
+
+    class Visitor(ABC, Generic[T]):
+        @abstractmethod
+        def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ...
+
+        @abstractmethod
+        def visit_literal_expr(self, expr: LiteralExpr) -> T: ...
+
+        @abstractmethod
+        def visit_type_expr(self, expr: TypeExpr) -> T: ...
+
+        @abstractmethod
+        def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ...
+
+        @abstractmethod
+        def visit_schema_expr(self, expr: SchemaExpr) -> T: ...
+
+        @abstractmethod
+        def visit_schema_element_expr(self, expr: SchemaElementExpr) -> T: ...
+
+
+@dataclass(frozen=True)
+class WildcardExpr(Expr):
+    token: Token
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_wildcard_expr(self)
+
+
+@dataclass(frozen=True)
+class LiteralExpr(Expr):
+    value: Any
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_literal_expr(self)
+
+
+@dataclass(frozen=True)
+class TypeExpr(Expr):
+    name: Token
+    constraints: list[ConstraintExpr]
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_type_expr(self)
+
+
+@dataclass(frozen=True)
+class ConstraintExpr(Expr):
+    left: Expr
+    op: Token
+    right: Expr
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_constraint_expr(self)
+
+
+@dataclass(frozen=True)
+class SchemaExpr(Expr):
+    left: Token
+    elements: list[Expr]
+    right: Token
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_schema_expr(self)
+
+
+@dataclass(frozen=True)
+class SchemaElementExpr(Expr):
+    name: Optional[Token]
+    type: Optional[Expr]
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_schema_element_expr(self)
--- a/core/ast/midas.py
+++ b/core/ast/midas.py
@@ -0,0 +1,138 @@
+from __future__ import annotations
+
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Any, Generic, Optional, TypeVar
+
+from lexer.token import Token
+
+T = TypeVar("T")
+
+
+# Statements
+
+
+@dataclass(frozen=True)
+class Stmt(ABC):
+    @abstractmethod
+    def accept(self, visitor: Visitor[T]) -> T: ...
+
+    class Visitor(ABC, Generic[T]):
+        @abstractmethod
+        def visit_type_stmt(self, stmt: TypeStmt) -> T: ...
+
+        @abstractmethod
+        def visit_property_stmt(self, stmt: PropertyStmt) -> T: ...
+
+        @abstractmethod
+        def visit_op_stmt(self, stmt: OpStmt) -> T: ...
+
+        @abstractmethod
+        def visit_constraint_stmt(self, stmt: ConstraintStmt) -> T: ...
+
+
+@dataclass(frozen=True)
+class TypeStmt(Stmt):
+    name: Token
+    bases: list[TypeExpr]
+    body: Optional[TypeBodyExpr]
+
+    def accept(self, visitor: Stmt.Visitor[T]) -> T:
+        return visitor.visit_type_stmt(self)
+
+
+@dataclass(frozen=True)
+class PropertyStmt(Stmt):
+    name: Token
+    type: TypeExpr
+
+    def accept(self, visitor: Stmt.Visitor[T]) -> T:
+        return visitor.visit_property_stmt(self)
+
+
+@dataclass(frozen=True)
+class OpStmt(Stmt):
+    left: TypeExpr
+    op: Token
+    right: TypeExpr
+    result: TypeExpr
+
+    def accept(self, visitor: Stmt.Visitor[T]) -> T:
+        return visitor.visit_op_stmt(self)
+
+
+@dataclass(frozen=True)
+class ConstraintStmt(Stmt):
+    name: Token
+    constraint: ConstraintExpr
+
+    def accept(self, visitor: Stmt.Visitor[T]) -> T:
+        return visitor.visit_constraint_stmt(self)
+
+
+# Expressions
+
+
+@dataclass(frozen=True)
+class Expr(ABC):
+    @abstractmethod
+    def accept(self, visitor: Visitor[T]) -> T: ...
+
+    class Visitor(ABC, Generic[T]):
+        @abstractmethod
+        def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ...
+
+        @abstractmethod
+        def visit_literal_expr(self, expr: LiteralExpr) -> T: ...
+
+        @abstractmethod
+        def visit_type_expr(self, expr: TypeExpr) -> T: ...
+
+        @abstractmethod
+        def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ...
+
+        @abstractmethod
+        def visit_type_body_expr(self, expr: TypeBodyExpr) -> T: ...
+
+
+@dataclass(frozen=True)
+class WildcardExpr(Expr):
+    token: Token
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_wildcard_expr(self)
+
+
+@dataclass(frozen=True)
+class LiteralExpr(Expr):
+    value: Any
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_literal_expr(self)
+
+
+@dataclass(frozen=True)
+class TypeExpr(Expr):
+    name: Token
+    constraints: list[ConstraintExpr]
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_type_expr(self)
+
+
+@dataclass(frozen=True)
+class ConstraintExpr(Expr):
+    left: Expr
+    op: Token
+    right: Expr
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_constraint_expr(self)
+
+
+@dataclass(frozen=True)
+class TypeBodyExpr(Expr):
+    properties: list[PropertyStmt]
+
+    def accept(self, visitor: Expr.Visitor[T]) -> T:
+        return visitor.visit_type_body_expr(self)
--- a/core/ast/printer.py
+++ b/core/ast/printer.py
@@ -0,0 +1,360 @@
+from __future__ import annotations
+
+from contextlib import contextmanager
+from enum import Enum, auto
+import io
+from typing import Generator, Generic, Optional, Protocol, TypeVar
+
+import core.ast.annotations as a
+import core.ast.midas as m
+
+
+class _Level(Enum):
+    EMPTY = auto()
+    ACTIVE = auto()
+    LAST = auto()
+
+
+class Expr(Protocol):
+    def accept(self, printer: AstPrinter) -> None: ...
+
+
+T = TypeVar("T", bound=Expr)
+
+
+class AstPrinter(Generic[T]):
+    LAST_CHILD = "└── "
+    CHILD = "├── "
+    VERTICAL = "│   "
+    EMPTY = "    "
+
+    def __init__(self):
+        self._levels: list[_Level] = []
+        self._idx: Optional[int] = None
+        self._buf: io.StringIO = io.StringIO()
+
+    def print(self, expr: T):
+        self._buf = io.StringIO()
+        expr.accept(self)
+        return self._buf.getvalue()
+
+    @contextmanager
+    def _child_level(self, last: bool = False) -> Generator[None, None, None]:
+        self._levels.append(_Level.LAST if last else _Level.ACTIVE)
+        try:
+            yield
+        finally:
+            self._levels.pop()
+
+    def _mark_last(self):
+        if self._levels:
+            self._levels[-1] = _Level.LAST
+
+    def _write_line(self, text: str, *, last: bool = False):
+        if last:
+            self._mark_last()
+        indent: str = self._build_indent()
+        if self._idx is not None:
+            text = f"[{self._idx}] {text}"
+            self._idx = None
+        self._buf.write(indent + text + "\n")
+
+    def _build_indent(self) -> str:
+        parts: list[str] = []
+        for level in self._levels[:-1]:
+            parts.append(self.EMPTY if level == _Level.EMPTY else self.VERTICAL)
+        if self._levels:
+            if self._levels[-1] == _Level.LAST:
+                parts.append(self.LAST_CHILD)
+                self._levels[-1] = _Level.EMPTY
+            else:
+                parts.append(self.CHILD)
+        return "".join(parts)
+
+    def _write_optional_child(
+        self, label: str, child: Optional[T], *, last: bool = False
+    ):
+        if last:
+            self._mark_last()
+        if child is None:
+            self._write_line(f"{label}: None")
+        else:
+            self._write_line(label)
+            with self._child_level(last=True):
+                child.accept(self)
+
+
+class AnnotationAstPrinter(AstPrinter, a.Expr.Visitor[None], a.Stmt.Visitor[None]):
+    def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> None:
+        self._write_line("AnnotationStmt")
+        with self._child_level():
+            self._write_line(f'name: "{stmt.name.lexeme}"')
+            self._write_optional_child("schema", stmt.schema, last=True)
+
+    def visit_type_expr(self, expr: a.TypeExpr):
+        self._write_line("TypeExpr")
+        with self._child_level():
+            self._write_line(f'name: "{expr.name.lexeme}"')
+            self._write_line("constraints", last=True)
+            with self._child_level():
+                for i, constraint in enumerate(expr.constraints):
+                    self._idx = i
+                    if i == len(expr.constraints) - 1:
+                        self._mark_last()
+                    constraint.accept(self)
+
+    def visit_constraint_expr(self, expr: a.ConstraintExpr) -> None:
+        self._write_line("ConstraintExpr")
+        with self._child_level():
+            self._write_line("left")
+            with self._child_level():
+                self._mark_last()
+                expr.left.accept(self)
+
+            self._write_line(f"operator: {expr.op.lexeme}")
+
+            self._write_line("right", last=True)
+            with self._child_level():
+                self._mark_last()
+                expr.right.accept(self)
+
+    def visit_schema_expr(self, expr: a.SchemaExpr):
+        self._write_line("SchemaExpr")
+        with self._child_level():
+            for i, elmt in enumerate(expr.elements):
+                self._idx = i
+                if i == len(expr.elements) - 1:
+                    self._mark_last()
+                elmt.accept(self)
+
+    def visit_schema_element_expr(self, expr: a.SchemaElementExpr):
+        self._write_line("SchemaElementExpr")
+        with self._child_level():
+            name_text: str = "None" if expr.name is None else f'"{expr.name.lexeme}"'
+            self._write_line(f"name: {name_text}")
+            self._write_optional_child("type", expr.type, last=True)
+
+    def visit_wildcard_expr(self, expr: a.WildcardExpr) -> None:
+        self._write_line("WildcardExpr")
+
+    def visit_literal_expr(self, expr: a.LiteralExpr) -> None:
+        self._write_line("LiteralExpr")
+        with self._child_level():
+            self._write_line(f"value: {expr.value}", last=True)
+
+
+class AnnotationPrinter(a.Expr.Visitor[str], a.Stmt.Visitor[str]):
+    def print(self, expr: a.Expr | a.Stmt):
+        return expr.accept(self)
+
+    def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> str:
+        schema: str = ""
+        if stmt.schema is not None:
+            schema = stmt.schema.accept(self)
+        return f"{stmt.name.lexeme}{schema}"
+
+    def visit_type_expr(self, expr: a.TypeExpr) -> str:
+        parts: list[str] = [expr.name.lexeme]
+        for constraint in expr.constraints:
+            parts.append("(" + constraint.accept(self) + ")")
+        return " + ".join(parts)
+
+    def visit_constraint_expr(self, expr: a.ConstraintExpr) -> str:
+        parts: list[str] = [
+            expr.left.accept(self),
+            expr.op.lexeme,
+            expr.right.accept(self),
+        ]
+        return " ".join(parts)
+
+    def visit_schema_expr(self, expr: a.SchemaExpr) -> str:
+        res: str = expr.left.lexeme
+        res += ", ".join(elmt.accept(self) for elmt in expr.elements)
+        res += expr.right.lexeme
+        return res
+
+    def visit_schema_element_expr(self, expr: a.SchemaElementExpr) -> str:
+        parts: list[str] = []
+        if expr.name is not None:
+            parts.append(expr.name.lexeme)
+
+        if expr.type is None:
+            parts.append("_")
+        else:
+            parts.append(expr.type.accept(self))
+        return ": ".join(parts)
+
+    def visit_wildcard_expr(self, expr: a.WildcardExpr) -> str:
+        return "_"
+
+    def visit_literal_expr(self, expr: a.LiteralExpr) -> str:
+        return str(expr.value)
+
+
+class MidasAstPrinter(AstPrinter, m.Expr.Visitor[None], m.Stmt.Visitor[None]):
+    def visit_type_stmt(self, stmt: m.TypeStmt):
+        self._write_line("TypeStmt")
+        with self._child_level():
+            self._write_line(f'name: "{stmt.name.lexeme}"')
+            self._write_line("bases")
+            with self._child_level():
+                for i, base in enumerate(stmt.bases):
+                    self._idx = i
+                    if i == len(stmt.bases) - 1:
+                        self._mark_last()
+                    base.accept(self)
+            self._write_optional_child("body", stmt.body, last=True)
+
+    def visit_property_stmt(self, stmt: m.PropertyStmt):
+        self._write_line("PropertyStmt")
+        with self._child_level():
+            self._write_line(f'name: "{stmt.name.lexeme}"')
+            self._write_line("type", last=True)
+            with self._child_level():
+                self._mark_last()
+                stmt.type.accept(self)
+
+    def visit_op_stmt(self, stmt: m.OpStmt) -> None:
+        self._write_line("OpStmt")
+        with self._child_level():
+            self._write_line("left")
+            with self._child_level():
+                self._mark_last()
+                stmt.left.accept(self)
+
+            self._write_line(f'op: "{stmt.op.lexeme}"')
+
+            self._write_line("right")
+            with self._child_level():
+                self._mark_last()
+                stmt.right.accept(self)
+
+            self._write_line("result", last=True)
+            with self._child_level():
+                self._mark_last()
+                stmt.result.accept(self)
+
+    def visit_constraint_stmt(self, stmt: m.ConstraintStmt):
+        self._write_line("ConstraintStmt")
+        with self._child_level():
+            self._write_line(f'name: "{stmt.name.lexeme}"')
+            self._write_line("constraint", last=True)
+            with self._child_level():
+                self._mark_last()
+                stmt.constraint.accept(self)
+
+    def visit_type_expr(self, expr: m.TypeExpr):
+        self._write_line("TypeExpr")
+        with self._child_level():
+            self._write_line(f'name: "{expr.name.lexeme}"')
+            self._write_line("constraints", last=True)
+            with self._child_level():
+                for i, constraint in enumerate(expr.constraints):
+                    self._idx = i
+                    if i == len(expr.constraints) - 1:
+                        self._mark_last()
+                    constraint.accept(self)
+
+    def visit_constraint_expr(self, expr: m.ConstraintExpr):
+        self._write_line("ConstraintExpr")
+        with self._child_level():
+            self._write_line("left")
+            with self._child_level():
+                self._mark_last()
+                expr.left.accept(self)
+
+            self._write_line(f"operator: {expr.op.lexeme}")
+
+            self._write_line("right", last=True)
+            with self._child_level():
+                self._mark_last()
+                expr.right.accept(self)
+
+    def visit_type_body_expr(self, expr: m.TypeBodyExpr):
+        self._write_line("TypeBodyExpr")
+        with self._child_level():
+            self._write_line("properties", last=True)
+            with self._child_level():
+                for i, property in enumerate(expr.properties):
+                    self._idx = i
+                    if i == len(expr.properties) - 1:
+                        self._mark_last()
+                    property.accept(self)
+
+    def visit_wildcard_expr(self, expr: m.WildcardExpr) -> None:
+        self._write_line("WildcardExpr")
+
+    def visit_literal_expr(self, expr: m.LiteralExpr) -> None:
+        self._write_line("LiteralExpr")
+        with self._child_level():
+            self._write_line(f"value: {expr.value}", last=True)
+
+class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]):
+    def __init__(self, indent: int = 4):
+        self.indent: int = indent
+        self.level: int = 0
+
+    def indented(self, text: str) -> str:
+        return " " * (self.level * self.indent) + text
+
+    def print(self, expr: m.Expr | m.Stmt):
+        self.level = 0
+        return expr.accept(self)
+
+    def visit_type_stmt(self, stmt: m.TypeStmt):
+        bases: list[str] = [
+            b.accept(self)
+            for b in stmt.bases
+        ]
+        
+        res: str = self.indented(f"type {stmt.name.lexeme}<{', '.join(bases)}>")
+        if stmt.body is not None:
+            res += " {\n"
+            self.level += 1
+            res += stmt.body.accept(self)
+            self.level -= 1
+            res += "\n" + self.indented("}")
+
+        return res
+
+    def visit_property_stmt(self, stmt: m.PropertyStmt):
+        return f"{stmt.name.lexeme}: {stmt.type.accept(self)}"
+
+    def visit_op_stmt(self, stmt: m.OpStmt):
+        left: str = stmt.left.accept(self)
+        op: str = stmt.op.lexeme
+        right: str = stmt.right.accept(self)
+        result: str = stmt.result.accept(self)
+        return self.indented(f"op <{left}> {op} <{right}> = <{result}>")
+
+    def visit_constraint_stmt(self, stmt: m.ConstraintStmt):
+        name: str = stmt.name.lexeme
+        constraint: str = stmt.constraint.accept(self)
+        return self.indented(f"constraint {name} = {constraint}")
+
+    def visit_type_expr(self, expr: m.TypeExpr):
+        parts: list[str] = [expr.name.lexeme]
+        for constraint in expr.constraints:
+            parts.append("(" + constraint.accept(self) + ")")
+        return " + ".join(parts)
+
+    def visit_constraint_expr(self, expr: m.ConstraintExpr):
+        parts: list[str] = [
+            expr.left.accept(self),
+            expr.op.lexeme,
+            expr.right.accept(self),
+        ]
+        return " ".join(parts)
+
+    def visit_type_body_expr(self, expr: m.TypeBodyExpr):
+        properties: list[str] = [
+            self.indented(prop.accept(self))
+            for prop in expr.properties
+        ]
+        return "\n".join(properties)
+
+    def visit_wildcard_expr(self, expr: m.WildcardExpr):
+        return "_"
+
+    def visit_literal_expr(self, expr: m.LiteralExpr):
+        return str(expr.value)
--- a/examples/00_syntax_prototype/01_simple_types.py
+++ b/examples/00_syntax_prototype/01_simple_types.py
@@ -0,0 +1,16 @@
+# type: ignore
+# ruff: disable[F821]
+from __future__ import annotations
+
+# A simple data-frame with different column of various simple types
+# Columns can be named and/or typed
+df: Frame[
+    verified: bool,
+    birth_year: int,
+    height: float + ( _ > 0 ) + ( _ < 250 ),
+    name: str,
+    date: datetime,
+    float,  # unnamed
+    unknown: _,  # untyped
+    _  # unnamed and untyped
+]
--- a/examples/00_syntax_prototype/02_custom_types.midas
+++ b/examples/00_syntax_prototype/02_custom_types.midas
@@ -0,0 +1,24 @@
+// Simple custom type derived from floats
+type Latitude<float>
+type Longitude<float>
+
+// Complex custom type, containing two values accessible through properties
+type GeoLocation<Latitude, Longitude> {
+    lat: Latitude
+    lon: Longitude
+}
+
+type LatitudeDiff<float>
+type LongitudeDiff<float>
+
+// Simple operation defined on our custom types
+op <Latitude> - <Latitude> = <LatitudeDiff>
+op <Longitude> - <Longitude> = <LongitudeDiff>
+
+// Simple custom type with a constraint
+type Age<int + (0 <= _) + (_ < 150)>
+
+// Predefined custom constraints that can be referenced in other definitions
+constraint Positive = _ >= 0
+constraint StrictlyPositive = _ > 0
+//constraint Even = _ % 2 == 0
--- a/examples/00_syntax_prototype/02_custom_types.py
+++ b/examples/00_syntax_prototype/02_custom_types.py
@@ -0,0 +1,34 @@
+# type: ignore
+# ruff: disable[F821]
+from __future__ import annotations
+
+# Prototype of custom type import to use valid Python syntax
+import midas
+midas.using("02_custom_types.midas")
+
+# A data-frame using a custom type
+df: Frame[
+    location: GeoLocation
+]
+
+# Properties of a type can be used on a column of that type
+lat: Column[GeoLocation] = df["location"].lat
+lon: Column[GeoLocation] = df["location"].lon
+
+# Unregistered operations between types are not permitted
+lat + lon  # Invalid operation
+
+# Registered operations are permitted
+lat1: Latitude = lat[0]
+lat2: Latitude = lat[1]
+lat_diff: LatitudeDiff = lat2 - lat1  # Valid operation
+
+# In addition to the type, a column can have one or more constraints, either defined inline or in a separate file
+df2: Frame[
+    age: int + (_ >= 0),
+    height: float + (_ >= 0),
+]
+df2_bis: Frame[
+    age: int + Positive,
+    height: float + Positive,
+]
--- a/lexer/init.py
+++ b/lexer/init.py
--- a/lexer/annotations.py
+++ b/lexer/annotations.py
@@ -0,0 +1,102 @@
+from lexer.base import Lexer
+from lexer.keyword import ANNOTATION_KEYWORDS
+from lexer.token import TokenType
+
+
+class AnnotationLexer(Lexer):
+    def scan_token(self) -> None:
+        char: str = self.advance()
+        match char:
+            case "(":
+                self.add_token(TokenType.LEFT_PAREN)
+            case ")":
+                self.add_token(TokenType.RIGHT_PAREN)
+            case "[":
+                self.add_token(TokenType.LEFT_BRACKET)
+            case "]":
+                self.add_token(TokenType.RIGHT_BRACKET)
+            case "<":
+                self.add_token(
+                    TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
+                )
+            case ">":
+                self.add_token(
+                    TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
+                )
+            case "=":
+                self.add_token(
+                    TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
+                )
+            case "!":
+                if self.match("="):
+                    self.add_token(TokenType.BANG_EQUAL)
+                else:
+                    self.error("Unexpected single bang. Did you mean '!=' ?")
+            case ":":
+                self.add_token(TokenType.COLON)
+            case ",":
+                self.add_token(TokenType.COMMA)
+            case "_":
+                self.add_token(TokenType.UNDERSCORE)
+            case "+":
+                self.add_token(TokenType.PLUS)
+            case "#":
+                self.scan_comment()
+            case "\n":
+                self.add_token(TokenType.NEWLINE)
+            case " " | "\r" | "\t":
+                # Consume all whitespace characters until EOL or EOF
+                while (
+                    self.peek().isspace()
+                    and self.peek() != "\n"
+                    and not self.is_at_end()
+                ):
+                    self.advance()
+                self.add_token(TokenType.WHITESPACE)
+            case _:
+                if char.isdigit():
+                    self.scan_number()
+                elif char.isalpha():
+                    self.scan_identifier()
+                else:
+                    self.error("Unexpected character")
+        return None
+
+    def scan_number(self):
+        """Scan the rest of number and add it as a token
+
+        This method handles both simple integers and floats. Scientific notation
+        and base prefixes (0x, 0b, 0o) are not supported
+        """
+        while self.peek().isdigit():
+            self.advance()
+
+        if self.peek() == "." and self.peek_next().isdigit():
+            self.advance()
+            while self.peek().isdigit():
+                self.advance()
+
+        value: float = float(self.source[self.start : self.idx])
+        self.add_token(TokenType.NUMBER, value)
+
+    def scan_identifier(self):
+        """Scan the rest of an identifier and add it as a token
+
+        An identifier starts with a letter, followed by any number of
+        alphanumerical characters or underscores
+        """
+        while self.peek().isalnum() or self.peek() == "_":
+            self.advance()
+        
+        lexeme: str = self.source[self.start : self.idx]
+        token_type: TokenType = ANNOTATION_KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
+        self.add_token(token_type)
+
+    def scan_comment(self):
+        """Scan the rest of a comment and add it as a token
+
+        A comment starts with a `#` character and ends at the EOL/EOF
+        """
+        while self.peek() != "\n" and not self.is_at_end():
+            self.advance()
+        self.add_token(TokenType.COMMENT)
--- a/lexer/base.py
+++ b/lexer/base.py
@@ -0,0 +1,166 @@
+from abc import ABC, abstractmethod
+from typing import Any, Callable, Optional
+
+from lexer.position import Position
+from lexer.token import Token, TokenType
+
+
+class Lexer(ABC):
+    """An abstract lexer which provides methods to easily extend it into a concrete one
+
+    This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
+    more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
+
+    [1]: https://craftinginterpreters.com/
+    """
+
+    def __init__(self, source: str, file: Optional[str] = None) -> None:
+        """Create a new lexer to scan for tokens in the given source
+
+        Args:
+            source (str): the source to scan
+            file (Optional[str], optional): the path of the given source. Can be a file path or any string identifier. Defaults to None.
+        """
+        self.source: str = source
+        self.file: Optional[str] = file
+        self.tokens: list[Token] = []
+        self.start: int = 0
+        self.idx: int = 0
+        self.length: int = len(self.source)
+        self.line: int = 1
+        self.column: int = 1
+        self.start_pos: Position = self.get_position()
+
+    def error(self, msg: str):
+        """Raise a syntax error
+
+        Args:
+            msg (str): the error message
+
+        Raises:
+            SyntaxError
+        """
+        raise SyntaxError(f"[ERROR] Error at {self.start_pos}: {msg}")
+
+    def process(self) -> list[Token]:
+        """Scan tokens out of the source text
+
+        Returns:
+            list[Token]: all the tokens that could be scanned
+
+        Raises:
+            SyntaxError: if a syntax error is found
+        """
+        self.scan_tokens()
+        self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
+        return self.tokens
+
+    def is_at_end(self) -> bool:
+        """Whether the lexer is at the end of the source
+
+        Returns:
+            bool: True if the current index is at the end of the source
+        """
+        return self.idx >= self.length
+
+    def get_position(self) -> Position:
+        """Get the current position
+
+        Returns:
+            Position: the current position
+        """
+        return Position(file=self.file, line=self.line, column=self.column)
+
+    def peek(self) -> str:
+        """Get the current character without advancing, if any
+
+        Returns:
+            str: the current character, or an empty string if at EOF
+        """
+        if self.idx < self.length:
+            return self.source[self.idx]
+        return ""
+
+    def peek_next(self) -> str:
+        """Get the next character without advancing, if any
+
+        Returns:
+            str: the next character, or an empty string if at EOF
+        """
+        if self.idx + 1 < self.length:
+            return self.source[self.idx + 1]
+        return ""
+
+    def advance(self) -> str:
+        """Get the new character and advance
+
+        Returns:
+            str: the current character, before advancing
+        """
+        char: str = self.peek()
+        self.idx += 1
+        self.column += 1
+        if char == "\n":
+            self.newline()
+        return char
+
+    def newline(self):
+        """Update the current position after encountering a newline character"""
+        self.line += 1
+        self.column = 1
+
+    def match(self, expected: str) -> bool:
+        """Consume the next character if it matches the given value
+
+        Args:
+            expected (str): the expected character
+
+        Returns:
+            bool: whether a character was matched and consumed
+        """
+        if self.peek() == expected:
+            self.advance()
+            return True
+        return False
+
+    def update_start(self):
+        """Update the starting position of the current lexeme
+
+        The cursor marking the start of the lexeme currently being scanned is
+        moved to the current position
+        """
+        self.start_pos = self.get_position()
+        self.start = self.idx
+
+    def add_token(self, token_type: TokenType, value: Optional[Any] = None):
+        """Add the current lexeme to the list of scanned tokens
+
+        Args:
+            token_type (TokenType): the type of token to add
+            value (Optional[Any], optional): the value of the token (useful for numbers or constants). Defaults to None.
+        """
+        lexeme: str = self.source[self.start : self.idx]
+        self.tokens.append(
+            Token(position=self.start_pos, type=token_type, lexeme=lexeme, value=value)
+        )
+
+    def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
+        """Scan tokens until EOF is reached or the given condition becomes False
+
+        Args:
+            condition (Optional[Callable[[], bool]], optional): the condition to continue scanning tokens.
+                If None, defaults to always being True, effectively scanning tokens until EOF is reached. Defaults to None.
+        """
+        if condition is None:
+            condition = lambda: True  # noqa: E731
+        while condition() and not self.is_at_end():
+            self.update_start()
+            self.scan_token()
+
+    @abstractmethod
+    def scan_token(self) -> None:
+        """Scan a token
+
+        This function should (at least) consume the current character and produce the appropriate token(s), using `add_token`
+        """
+        pass
--- a/lexer/keyword.py
+++ b/lexer/keyword.py
@@ -0,0 +1,16 @@
+from lexer.token import TokenType
+
+ANNOTATION_KEYWORDS: dict[str, TokenType] = {
+    "True": TokenType.TRUE,
+    "False": TokenType.FALSE,
+    "None": TokenType.NONE,
+}
+
+MIDAS_KEYWORDS: dict[str, TokenType] = {
+    "type": TokenType.TYPE,
+    "op": TokenType.OP,
+    "constraint": TokenType.CONSTRAINT,
+    "true": TokenType.TRUE,
+    "false": TokenType.FALSE,
+    "none": TokenType.NONE,
+}
--- a/lexer/midas.py
+++ b/lexer/midas.py
@@ -0,0 +1,131 @@
+from lexer.base import Lexer
+from lexer.keyword import MIDAS_KEYWORDS
+from lexer.token import TokenType
+
+
+class MidasLexer(Lexer):
+    def scan_token(self) -> None:
+        char: str = self.advance()
+        match char:
+            case "(":
+                self.add_token(TokenType.LEFT_PAREN)
+            case ")":
+                self.add_token(TokenType.RIGHT_PAREN)
+            case "[":
+                self.add_token(TokenType.LEFT_BRACKET)
+            case "]":
+                self.add_token(TokenType.RIGHT_BRACKET)
+            case "{":
+                self.add_token(TokenType.LEFT_BRACE)
+            case "}":
+                self.add_token(TokenType.RIGHT_BRACE)
+            case "<":
+                self.add_token(
+                    TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
+                )
+            case ">":
+                self.add_token(
+                    TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
+                )
+            case "=":
+                self.add_token(
+                    TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
+                )
+            case "!":
+                if self.match("="):
+                    self.add_token(TokenType.BANG_EQUAL)
+                else:
+                    self.error("Unexpected single bang. Did you mean '!=' ?")
+            case ":":
+                self.add_token(TokenType.COLON)
+            case ",":
+                self.add_token(TokenType.COMMA)
+            case "_":
+                self.add_token(TokenType.UNDERSCORE)
+            case "+":
+                self.add_token(TokenType.PLUS)
+            case "-":
+                self.add_token(TokenType.MINUS)
+            case "*":
+                self.add_token(TokenType.STAR)
+            case "/":
+                if self.match("/"):
+                    self.scan_comment()
+                elif self.match("*"):
+                    self.scan_comment_multiline()
+                else:
+                    self.add_token(TokenType.SLASH)
+            case "\n":
+                self.add_token(TokenType.NEWLINE)
+            case " " | "\r" | "\t":
+                # Consume all whitespace characters until EOL or EOF
+                while (
+                    self.peek().isspace()
+                    and self.peek() != "\n"
+                    and not self.is_at_end()
+                ):
+                    self.advance()
+                self.add_token(TokenType.WHITESPACE)
+            case _:
+                if char.isdigit():
+                    self.scan_number()
+                elif char.isalpha():
+                    self.scan_identifier()
+                else:
+                    self.error("Unexpected character")
+        return None
+
+    def scan_number(self):
+        """Scan the rest of number and add it as a token
+
+        This method handles both simple integers and floats. Scientific notation
+        and base prefixes (0x, 0b, 0o) are not supported
+        """
+        while self.peek().isdigit():
+            self.advance()
+
+        if self.peek() == "." and self.peek_next().isdigit():
+            self.advance()
+            while self.peek().isdigit():
+                self.advance()
+
+        value: float = float(self.source[self.start : self.idx])
+        self.add_token(TokenType.NUMBER, value)
+
+    def scan_identifier(self):
+        """Scan the rest of an identifier and add it as a token
+
+        An identifier starts with a letter, followed by any number of
+        alphanumerical characters or underscores
+        """
+        while self.peek().isalnum() or self.peek() == "_":
+            self.advance()
+
+        lexeme: str = self.source[self.start : self.idx]
+        token_type: TokenType = MIDAS_KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
+        self.add_token(token_type)
+
+    def scan_comment(self):
+        """Scan the rest of a comment and add it as a token
+
+        A comment starts with `//` and ends at the EOL/EOF
+        """
+        while self.peek() != "\n" and not self.is_at_end():
+            self.advance()
+        self.add_token(TokenType.COMMENT)
+
+    def scan_comment_multiline(self):
+        """Scan the rest of a multiline comment and add it as a token
+
+        A multiline comment starts with `/*` and ends with `*/` or at the EOF
+        """
+        while (
+            not (self.peek() == "*" and self.peek_next() == "/")
+            and not self.is_at_end()
+        ):
+            self.advance()
+        if not self.is_at_end():
+            self.advance()
+        if not self.is_at_end():
+            self.advance()
+        self.add_token(TokenType.COMMENT)
--- a/lexer/position.py
+++ b/lexer/position.py
@@ -0,0 +1,13 @@
+from dataclasses import dataclass
+from typing import Optional
+
+
+@dataclass(frozen=True)
+class Position:
+    """A simple structure to store the position of a token"""
+    file: Optional[str]
+    line: int
+    column: int
+
+    def __repr__(self):
+        return f"{self.file or ''}L{self.line}:{self.column}"
--- a/lexer/token.py
+++ b/lexer/token.py
@@ -0,0 +1,59 @@
+from dataclasses import dataclass
+from enum import Enum, auto
+from typing import Any
+
+from lexer.position import Position
+
+
+class TokenType(Enum):
+    # Punctuation
+    LEFT_PAREN = auto()
+    RIGHT_PAREN = auto()
+    LEFT_BRACKET = auto()
+    RIGHT_BRACKET = auto()
+    LEFT_BRACE = auto()
+    RIGHT_BRACE = auto()
+    COLON = auto()
+    COMMA = auto()
+    UNDERSCORE = auto()
+
+    # Operators
+    PLUS = auto()
+    MINUS = auto()
+    STAR = auto()
+    SLASH = auto()
+    GREATER = auto()
+    GREATER_EQUAL = auto()
+    LESS = auto()
+    LESS_EQUAL = auto()
+    EQUAL = auto()
+    EQUAL_EQUAL = auto()
+    BANG_EQUAL = auto()
+
+    # Literals
+    IDENTIFIER = auto()
+    NUMBER = auto()
+    TRUE = auto()
+    FALSE = auto()
+    NONE = auto()
+
+    # Keywords
+    TYPE = auto()
+    OP = auto()
+    CONSTRAINT = auto()
+
+    # Misc
+    COMMENT = auto()
+    WHITESPACE = auto()
+    EOF = auto()
+    NEWLINE = auto()
+
+
+@dataclass(frozen=True)
+class Token:
+    """A scanned token"""
+
+    type: TokenType
+    lexeme: str
+    value: Any
+    position: Position
--- a/parser/annotations.py
+++ b/parser/annotations.py
@@ -0,0 +1,152 @@
+from typing import Optional
+
+from core.ast.annotations import (
+    AnnotationStmt,
+    ConstraintExpr,
+    Expr,
+    LiteralExpr,
+    SchemaElementExpr,
+    SchemaExpr,
+    Stmt,
+    TypeExpr,
+    WildcardExpr,
+)
+from lexer.token import Token, TokenType
+from parser.base import Parser
+from parser.errors import ParsingError
+
+
+class AnnotationParser(Parser):
+    """A simple parser for custom type annotations"""
+
+    SYNC_BOUNDARY: set[TokenType] = set()
+
+    def parse(self) -> Optional[Stmt]:
+        stmt: Optional[Stmt] = None
+        try:
+            stmt = self.annotation()
+        except ParsingError:
+            self.synchronize()
+        if not self.is_at_end():
+            self.error(self.peek(), "Extra tokens")
+        return stmt
+
+    def synchronize(self):
+        """Skip tokens until a synchronization boundary is found
+
+        This method allows gracefully recovering from a parse error
+        to a safe place and continue parsing
+        """
+        self.advance()
+        while not self.is_at_end():
+            if self.peek().type in self.SYNC_BOUNDARY:
+                return
+            self.advance()
+
+    def annotation(self) -> AnnotationStmt:
+        """Parse an annotation
+
+        An annotation is written as `Type` or `Type[Schema]`
+
+        Returns:
+            AnnotationStmt: the parsed annotation statement
+        """
+
+        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type identifier")
+        schema: Optional[SchemaExpr] = None
+        if self.match(TokenType.LEFT_BRACKET):
+            schema = self.schema()
+        return AnnotationStmt(name=name, schema=schema)
+
+    def type_expr(self) -> TypeExpr:
+        """Parse a type expression
+
+        Returns:
+            TypeExpr: the parsed type expression
+        """
+        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
+        constraints: list[ConstraintExpr] = []
+
+        while not self.is_at_end() and self.match(TokenType.PLUS):
+            self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint")
+            constraints.append(self.constraint_expr())
+            self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint")
+
+        return TypeExpr(name=name, constraints=constraints)
+
+    def constraint_expr(self) -> ConstraintExpr:
+        """Parse a type constraint
+
+        Returns:
+            ConstraintExpr: the parsed type constraint expression
+        """
+        
+        left: Expr = self.constraint_value()
+        op: Token = self.constraint_operator()
+        right: Expr = self.constraint_value()
+        return ConstraintExpr(left=left, op=op, right=right)
+
+    def constraint_value(self) -> Expr:
+        if self.match(TokenType.UNDERSCORE):
+            return WildcardExpr(self.previous())
+        return self.literal()
+
+    def literal(self) -> LiteralExpr:
+        if self.match(TokenType.FALSE):
+            return LiteralExpr(False)
+        if self.match(TokenType.TRUE):
+            return LiteralExpr(True)
+        if self.match(TokenType.NONE):
+            return LiteralExpr(None)
+        
+        if self.match(TokenType.NUMBER):
+            return LiteralExpr(self.previous().value)
+        
+        raise self.error(self.peek(), "Expected literal")
+
+    def constraint_operator(self) -> Token:
+        if self.match(TokenType.LESS, TokenType.LESS_EQUAL, TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.EQUAL_EQUAL, TokenType.BANG_EQUAL):
+            return self.previous()
+        raise self.error(self.peek(), "Expected constraint operator")
+
+    def schema(self) -> SchemaExpr:
+        """Parse a schema definition
+
+        A comma separated list of schema elements
+
+        Returns:
+            SchemaExpr: the parsed schema expression
+        """
+        left: Token = self.previous()
+        elements: list[Expr] = []
+        while not self.check(TokenType.RIGHT_BRACKET) and not self.is_at_end():
+            elements.append(self.schema_element())
+            if not self.check(TokenType.RIGHT_BRACKET):
+                self.consume(TokenType.COMMA, "Expected ',' between schema elements")
+
+        right: Token = self.consume(TokenType.RIGHT_BRACKET, "Unclosed schema")
+        return SchemaExpr(left=left, elements=elements, right=right)
+
+    def schema_element(self) -> SchemaElementExpr:
+        """Parse a schema element
+
+        An anonymous element (`_`), a type, an untyped named column (`name: _`),
+        or a named column (`name: Type`)
+
+        Returns:
+            SchemaElementExpr: the parsed schema element expression
+        """
+        if self.match(TokenType.UNDERSCORE):
+            return SchemaElementExpr(name=None, type=None)
+
+        if not self.check(TokenType.IDENTIFIER):
+            raise self.error(self.peek(), "Expected schema element")
+
+        name: Optional[Token] = None
+        type: Optional[TypeExpr] = None
+        if self.check_next(TokenType.COLON):
+            name = self.advance()
+            self.advance()
+        if not self.match(TokenType.UNDERSCORE):
+            type = self.type_expr()
+        return SchemaElementExpr(name=name, type=type)
--- a/parser/base.py
+++ b/parser/base.py
@@ -0,0 +1,183 @@
+from abc import ABC, abstractmethod
+from dataclasses import dataclass
+from typing import Generic, TypeVar
+
+from lexer.token import Token, TokenType
+from parser.errors import ParsingError
+
+
+@dataclass(frozen=True)
+class TokenError:
+    """A parsing error linked to a particular token"""
+
+    token: Token
+    message: str
+
+    def get_report(self) -> str:
+        """Get a detailed error message
+
+        Returns:
+            str: the complete error message
+        """
+        where: str = f"'{self.token.lexeme}'"
+        if self.token.type == TokenType.EOF:
+            where = "end"
+        return f"({self.token.position}) Error at {where}: {self.message}"
+
+
+T = TypeVar("T")
+
+
+class Parser(ABC, Generic[T]):
+    """An abstract parser which provides methods to easily extend it into a concrete one
+
+    This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
+    more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
+
+    [1]: https://craftinginterpreters.com/
+    """
+
+    IGNORE: set[TokenType] = {
+        TokenType.WHITESPACE,
+        TokenType.COMMENT,
+        TokenType.NEWLINE,
+    }
+
+    def __init__(self, tokens: list[Token]) -> None:
+        """Create a new parser to parse the given tokens
+
+        Args:
+            tokens (list[Token]): the tokens to parse
+        """
+        self.tokens: list[Token] = list(
+            filter(lambda t: t.type not in self.IGNORE, tokens)
+        )
+        self.current: int = 0
+        self.length: int = len(self.tokens)
+        self.errors: list[TokenError] = []
+
+    def error(self, token: Token, message: str):
+        """Record an error
+
+        Args:
+            token (Token): the token at which the error was detected
+            message (str): a message explaining the error
+
+        Returns:
+            ParsingError: the parsing error to raise
+        """
+        self.errors.append(TokenError(token=token, message=message))
+        return ParsingError()
+
+    @abstractmethod
+    def parse(self) -> T:
+        """Parse the tokens
+
+        Returns:
+            T: the parsed element(s)
+        """
+        pass
+
+    def is_at_end(self) -> bool:
+        """Whether the parser is at the end of the token list
+
+        Returns:
+            bool: True if the current index is at the end of the token list
+        """
+        return self.peek().type == TokenType.EOF
+
+    def peek(self) -> Token:
+        """Get the current token without advancing
+
+        Returns:
+            Token: the current token
+        """
+        return self.tokens[self.current]
+
+    def previous(self) -> Token:
+        """Get the previous token
+
+        This function is unsafe and will raise an IndexError if called when
+        the parser is at the begin of the token list
+
+        Returns:
+            Token: the previous token
+        """
+        return self.tokens[self.current - 1]
+
+    def check(self, token_type: TokenType) -> bool:
+        """Check whether the current token is of the given type
+
+        This function always returns False if the parser is at the EOF token
+
+        Args:
+            token_type (TokenType): the type of token to check
+
+        Returns:
+            bool: True if the current token is of the given type and not EOF
+        """
+        if self.is_at_end():
+            return False
+        return self.peek().type == token_type
+
+    def check_next(self, token_type: TokenType) -> bool:
+        """Check whether the next token is of the given type
+
+        This function always returns False if the parser is at the EOF token
+
+        Args:
+            token_type (TokenType): the type of token to check
+
+        Returns:
+            bool: True if the current token is of the given type and not EOF
+        """
+        if self.is_at_end():
+            return False
+        if self.current + 1 >= self.length:
+            return False
+        token: Token = self.tokens[self.current + 1]
+        if token.type == TokenType.EOF:
+            return False
+        return token.type == token_type
+
+    def advance(self) -> Token:
+        """Consume and return the current token, if not at the EOF
+
+        Returns:
+            Token: the current token, before advancing
+        """
+        if not self.is_at_end():
+            self.current += 1
+        return self.previous()
+
+    def match(self, *types: TokenType) -> bool:
+        """Consume the next token if it matches one of the given types
+
+        Returns:
+            bool: whether a token was matched and consumed
+        """
+        for token_type in types:
+            if self.check(token_type):
+                self.advance()
+                return True
+        return False
+
+    def consume(self, token_type: TokenType, error_msg: str) -> Token:
+        """Consume the current token if it matches the given type or raise an error
+
+        If the current token doesn't match the given type, an error is raised
+        with the provided message
+
+        Args:
+            token_type (TokenType): the expected token type
+            error_msg (str): the error message if the token doesn't match
+
+        Raises:
+            SyntaxError: if the current token doesn't match the given type
+
+        Returns:
+            Token: the current token which matched the given type
+        """
+        if self.check(token_type):
+            return self.advance()
+        raise self.error(self.peek(), error_msg)
--- a/parser/errors.py
+++ b/parser/errors.py
@@ -0,0 +1,2 @@
+class ParsingError(RuntimeError):
+    pass
--- a/parser/midas.py
+++ b/parser/midas.py
@@ -0,0 +1,217 @@
+from typing import Optional
+
+from core.ast.midas import (
+    ConstraintExpr,
+    ConstraintStmt,
+    Expr,
+    LiteralExpr,
+    OpStmt,
+    PropertyStmt,
+    Stmt,
+    TypeBodyExpr,
+    TypeExpr,
+    TypeStmt,
+    WildcardExpr,
+)
+from lexer.token import Token, TokenType
+from parser.base import Parser
+from parser.errors import ParsingError
+
+
+class MidasParser(Parser):
+    """A simple parser for midas type definitions"""
+
+    SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT}
+
+    def parse(self) -> list[Stmt]:
+        statements: list[Stmt] = []
+        while not self.is_at_end():
+            stmt: Optional[Stmt] = self.declaration()
+            if stmt is None:
+                print("Early stop")
+                break
+            statements.append(stmt)
+        return statements
+
+    def synchronize(self):
+        """Skip tokens until a synchronization boundary is found
+
+        This method allows gracefully recovering from a parse error
+        to a safe place and continue parsing
+        """
+        self.advance()
+        while not self.is_at_end():
+            if self.previous().type == TokenType.NEWLINE:
+                return
+            if self.peek().type in self.SYNC_BOUNDARY:
+                return
+            self.advance()
+
+    def declaration(self) -> Optional[Stmt]:
+        """Try and parse a declaration
+
+        Any parsing error is caught and None is returned
+
+        Returns:
+            Optional[Stmt]: the parsed Midas statement, or None if a ParsingError was raised
+        """
+        try:
+            if self.match(TokenType.TYPE):
+                return self.type_declaration()
+            if self.match(TokenType.OP):
+                return self.op_declaration()
+            if self.match(TokenType.CONSTRAINT):
+                return self.constraint_declaration()
+            raise self.error(self.peek(), "Unexpected token")
+        except ParsingError:
+            self.synchronize()
+            return None
+
+    def type_declaration(self) -> TypeStmt:
+        """Parse a type declaration
+
+        A type declaration is written `type Name<TypeExpr, ...>` optionally followed by a brace-wrapped body
+
+        Returns:
+            TypeStmt: the parsed type declaration statement
+        """
+        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
+        self.consume(TokenType.LESS, "Expected '<' after type name")
+        bases: list[TypeExpr] = []
+        while not self.check(TokenType.GREATER) and not self.is_at_end():
+            bases.append(self.type_expr())
+            if not self.check(TokenType.GREATER):
+                self.consume(TokenType.COMMA, "Expected ',' between type bases")
+        self.consume(TokenType.GREATER, "Expected '>' after base type")
+
+        body: Optional[TypeBodyExpr] = None
+
+        if self.check(TokenType.LEFT_BRACE):
+            body = self.type_body_expr()
+        return TypeStmt(name=name, bases=bases, body=body)
+
+    def type_expr(self) -> TypeExpr:
+        """Parse a type expression
+
+        Returns:
+            TypeExpr: the parsed type expression
+        """
+        name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
+        constraints: list[ConstraintExpr] = []
+
+        while not self.is_at_end() and self.match(TokenType.PLUS):
+            self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint")
+            constraints.append(self.constraint_expr())
+            self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint")
+
+        return TypeExpr(name=name, constraints=constraints)
+
+    def constraint_expr(self) -> ConstraintExpr:
+        """Parse a type constraint
+
+        Returns:
+            ConstraintExpr: the parsed type constraint expression
+        """
+
+        left: Expr = self.constraint_value()
+        op: Token = self.constraint_operator()
+        right: Expr = self.constraint_value()
+        return ConstraintExpr(left=left, op=op, right=right)
+
+    def constraint_value(self) -> Expr:
+        if self.match(TokenType.UNDERSCORE):
+            return WildcardExpr(self.previous())
+        return self.literal()
+
+    def literal(self) -> LiteralExpr:
+        if self.match(TokenType.FALSE):
+            return LiteralExpr(False)
+        if self.match(TokenType.TRUE):
+            return LiteralExpr(True)
+        if self.match(TokenType.NONE):
+            return LiteralExpr(None)
+
+        if self.match(TokenType.NUMBER):
+            return LiteralExpr(self.previous().value)
+
+        raise self.error(self.peek(), "Expected literal")
+
+    def constraint_operator(self) -> Token:
+        if self.match(
+            TokenType.LESS,
+            TokenType.LESS_EQUAL,
+            TokenType.GREATER,
+            TokenType.GREATER_EQUAL,
+            TokenType.EQUAL_EQUAL,
+            TokenType.BANG_EQUAL,
+        ):
+            return self.previous()
+        raise self.error(self.peek(), "Expected constraint operator")
+
+    def type_body_expr(self) -> TypeBodyExpr:
+        """Parse a type definition body
+
+        A type definition body is a set of whitespace-separated
+        property statements enclosed in curly braces
+
+        Returns:
+            TypeBodyExpr: the parsed type body expression
+        """
+        self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body")
+        properties: list[PropertyStmt] = []
+        while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end():
+            properties.append(self.property_stmt())
+        self.consume(TokenType.RIGHT_BRACE, "Unclosed type body")
+        return TypeBodyExpr(properties=properties)
+
+    def property_stmt(self) -> PropertyStmt:
+        """Parse a property statement
+
+        A type property statement is written `name: Type`
+
+        Returns:
+            PropertyStmt: the parsed property statement
+        """
+        name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name")
+        self.consume(TokenType.COLON, "Expected ':' after property name")
+        type: TypeExpr = self.type_expr()
+        return PropertyStmt(name=name, type=type)
+
+    def op_declaration(self) -> OpStmt:
+        """Parse an operation definition
+
+        An operation is written `op <Type1> operator <Type2> = <Type3>` where `operator` can be any single token
+
+        Returns:
+            OpStmt: the parsed operation statement
+        """
+        self.consume(TokenType.LESS, "Expected '<' before first type")
+        left: TypeExpr = self.type_expr()
+        self.consume(TokenType.GREATER, "Expected '>' after first type")
+
+        op: Token = self.advance()
+
+        self.consume(TokenType.LESS, "Expected '<' before second type")
+        right: TypeExpr = self.type_expr()
+        self.consume(TokenType.GREATER, "Expected '>' after second type")
+
+        self.consume(TokenType.EQUAL, "Expected '=' after second type")
+
+        self.consume(TokenType.LESS, "Expected '<' before result type")
+        result: TypeExpr = self.type_expr()
+        self.consume(TokenType.GREATER, "Expected '>' after result type")
+
+        return OpStmt(left=left, op=op, right=right, result=result)
+
+    def constraint_declaration(self) -> ConstraintStmt:
+        """Parse a type constraint declaration
+
+        A constraint is written `constraint Name = constraint_expression`
+
+        Returns:
+            ConstraintStmt: the parsed constraint declaration statement
+        """
+        name: Token = self.consume(TokenType.IDENTIFIER, "Expected constraint name")
+        self.consume(TokenType.EQUAL, "Expected '=' after constraint name")
+        constraint: ConstraintExpr = self.constraint_expr()
+        return ConstraintStmt(name=name, constraint=constraint)
--- a/syntax/annotations.ebnf
+++ b/syntax/annotations.ebnf
@@ -0,0 +1,20 @@
+identifier ::= '[a-zA-Z][a-zA-Z_]*'
+
+integer ::= '\d+'
+number ::= integer ["." integer]
+boolean ::= "False" | "True"
+none ::= "None"
+
+value ::= number | boolean | none
+lambda-value ::= "_" | value
+lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!="
+lambda ::= lambda-value lambda-operator lambda-value
+
+constraint ::= identifier | "(" lambda ")"
+base-type ::= identifier
+type ::= base-type { "+" constraint }
+
+column-type ::= type | "_"
+column-def ::= [ identifier ":" ] column-type
+
+frame-def ::= column-def { "," column-def }
--- a/syntax/annotations.typ
+++ b/syntax/annotations.typ
@@ -0,0 +1,74 @@
+#import "@preview/fervojo:0.1.1": render
+
+#let value = ```
+{[`value` <
+  [`number` 'digit' * ! <!, ["." 'digit' * !]>],
+  [`boolean` <"False", "True">],
+  [`none` "None"]
+>]}
+```
+
+#let constraint = ```
+{[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]}
+```
+
+#let type-with-constraints = ```
+{[`type-with-constraints` 'identifier' <!, ["+" "(" 'constraint' ")"] * !>]}
+```
+
+#let column-def = ```
+{[`column-def` <!, ['identifier' ":"]> <"_", 'type-with-constraints'>]}
+```
+
+#let frame-def = ```
+{[`frame-def` 'column-def' * ","]}
+```
+
+#let annotation = ```
+{[`annotation` 'identifier' <!, ["[" 'frame-def' "]"]>]}
+```
+
+#let rules = (
+  value,
+  constraint,
+  type-with-constraints,
+  column-def,
+  frame-def,
+  annotation,
+)
+
+#set text(font: "Source Sans 3")
+
+= Type annotation syntax
+
+#for rule in rules {
+  render(rule)
+}
+
+/*
+#let by-name = (
+  annotation: annotation,
+  frame-def: frame-def,
+  column-def: column-def,
+  type-with-constraints: type-with-constraints,
+  constraint: constraint,
+  value: value,
+)
+
+#let substitute(base-rule) = {
+  let new-rule = base-rule
+  for (key, rule) in by-name.pairs() {
+    new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1))
+  }
+  if new-rule != base-rule {
+    new-rule = substitute(new-rule)
+  }
+  return new-rule
+}
+
+#let combined = raw(substitute(annotation.text))
+
+
+#set page(flipped: true)
+#render(combined)
+*/
--- a/syntax/midas.ebnf
+++ b/syntax/midas.ebnf
@@ -0,0 +1,26 @@
+identifier ::= '[a-zA-Z][a-zA-Z_]*'
+
+integer ::= '\d+'
+number ::= integer ["." integer]
+boolean ::= "False" | "True"
+none ::= "None"
+
+value ::= number | boolean | none
+lambda-value ::= "_" | value
+lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!="
+lambda ::= lambda-value lambda-operator lambda-value
+
+constraint ::= identifier | "(" lambda ")"
+base-type ::= identifier
+type ::= base-type { "+" constraint }
+
+type-property ::= 'identifier' ":" 'type'
+type-body ::= "{" { 'type-property' } "}"
+
+operation-type ::= "<" 'type' ">"
+
+type-statement ::= "type" 'identifier' "<" 'type' {"," 'type'} ">" ['type-body']
+operation-statement ::= "op" 'operation-type' 'operator' 'operation-type' "=" 'operation-type'
+constraint-statement ::= "constraint" 'identifier' "=" 'lambda'
+
+statement ::= type-statement | operation-statement | constraint-statement
--- a/syntax/midas.typ
+++ b/syntax/midas.typ
@@ -0,0 +1,97 @@
+#import "@preview/fervojo:0.1.1": render
+
+#let value = ```
+{[`value` <
+  [`number` 'digit' * ! <!, ["." 'digit' * !]>],
+  [`boolean` <"False", "True">],
+  [`none` "None"]
+>]}
+```
+
+#let constraint = ```
+{[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]}
+```
+
+#let type-with-constraints = ```
+{[`type-with-constraints` 'identifier' <!, ["+" "(" 'constraint' ")"] * !>]}
+```
+
+#let type-property = ```
+{[`type-property` 'identifier' ":" 'type-with-constraints']}
+```
+
+#let type-body = ```
+{[`type-body` "{" <!, 'type-property'*!> "}"]}
+```
+
+#let operation-type = ```
+{[`operation-type` "<" 'type-with-constraints' ">"]}
+```
+
+#let type-statement = ```
+{[`type-statement` "type" 'identifier' "<" 'type-with-constraints'*"," ">" <!, 'type-body'>]}
+```
+
+#let operation-statement = ```
+{[`operation-statement` "op" 'operation-type' "operator" 'operation-type' "=" 'operation-type']}
+```
+
+#let constraint-statement = ```
+{[`constraint-statement` "constraint" 'identifier' "=" 'constraint']}
+```
+
+#let statement = ```
+{[`statement` <'type-statement', 'operation-statement', 'constraint-statement'>]}
+```
+
+#let rules = (
+  value,
+  constraint,
+  type-with-constraints,
+  type-property,
+  type-body,
+  operation-type,
+  type-statement,
+  operation-statement,
+  constraint-statement,
+  statement,
+)
+
+#set text(font: "Source Sans 3")
+
+= Midas type definition syntax
+
+#for rule in rules {
+  render(rule)
+}
+
+/*
+#let by-name = (
+  value: value,
+  constraint: constraint,
+  type-with-constraints: type-with-constraints,
+  type-property: type-property,
+  type-body: type-body,
+  operation-type: operation-type,
+  type-statement: type-statement,
+  operation-statement: operation-statement,
+  constraint-statement: constraint-statement,
+)
+
+#let substitute(base-rule) = {
+  let new-rule = base-rule
+  for (key, rule) in by-name.pairs() {
+    new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1))
+  }
+  if new-rule != base-rule {
+    new-rule = substitute(new-rule)
+  }
+  return new-rule.replace(regex("`.*?`"), "")
+}
+
+#let combined = raw(substitute(statement.text))
+
+
+#set page(flipped: true)
+#render(combined)
+*/
--- a/test.py
+++ b/test.py
@@ -0,0 +1,52 @@
+import importlib
+from pathlib import Path
+
+from core.ast.printer import AnnotationAstPrinter, MidasAstPrinter
+from lexer.annotations import AnnotationLexer
+from lexer.midas import MidasLexer
+from lexer.token import Token
+from parser.annotations import AnnotationParser
+from parser.midas import MidasParser
+
+
+def test_annotation():
+    # Frame annotation
+    mod = importlib.import_module("examples.00_syntax_prototype.01_simple_types")
+
+    annotation: str = mod.__annotations__["df"]
+    lexer: AnnotationLexer = AnnotationLexer(annotation, "01_simple_types.py")
+    tokens: list[Token] = lexer.process()
+    # print([f"{t.type.name}('{t.lexeme}')" for t in tokens])
+
+    parser = AnnotationParser(tokens)
+    parsed = parser.parse()
+    print(parsed)
+    for err in parser.errors:
+        print(err.get_report())
+    printer = AnnotationAstPrinter()
+    if parsed is not None:
+        print(printer.print(parsed))
+
+
+def test_midas():
+    # Midas type definitions
+    path: Path = Path("examples") / "00_syntax_prototype" / "02_custom_types.midas"
+    definitions: str = path.read_text()
+    midas_lexer: MidasLexer = MidasLexer(definitions, path.name)
+    tokens: list[Token] = midas_lexer.process()
+    # print([f"{t.type.name}('{t.lexeme}')" for t in tokens])
+
+    parser = MidasParser(tokens)
+    parsed = parser.parse()
+    print(parsed)
+    for err in parser.errors:
+        print(err.get_report())
+    printer = MidasAstPrinter()
+    for stmt in parsed:
+        if stmt is None:
+            print("None")
+            continue
+        print(printer.print(stmt))
+
+
+test_midas()
--- a/tests/lexer/test_annotation_lexer.py
+++ b/tests/lexer/test_annotation_lexer.py
@@ -0,0 +1,129 @@
+from typing import Any
+
+import pytest
+
+from lexer.annotations import AnnotationLexer
+from lexer.token import Token, TokenType
+
+
+def scan(source: str) -> list[Token]:
+    return AnnotationLexer(source).process()
+
+
+def assert_n_tokens(tokens: list[Token], n: int):
+    assert len(tokens) == n + 1
+    assert tokens[-1].type == TokenType.EOF
+
+
+@pytest.mark.parametrize(
+    "src,expected",
+    [
+        ("(", TokenType.LEFT_PAREN),
+        (")", TokenType.RIGHT_PAREN),
+        ("[", TokenType.LEFT_BRACKET),
+        ("]", TokenType.RIGHT_BRACKET),
+        (":", TokenType.COLON),
+        (",", TokenType.COMMA),
+        ("_", TokenType.UNDERSCORE),
+    ],
+)
+def test_punctuation(src: str, expected: TokenType):
+    tokens: list[Token] = scan(src)
+    assert_n_tokens(tokens, 1)
+    assert tokens[0].type == expected
+
+
+@pytest.mark.parametrize(
+    "src,expected",
+    [
+        ("+", TokenType.PLUS),
+        (">", TokenType.GREATER),
+        (">=", TokenType.GREATER_EQUAL),
+        ("<", TokenType.LESS),
+        ("<=", TokenType.LESS_EQUAL),
+        ("=", TokenType.EQUAL),
+        ("==", TokenType.EQUAL_EQUAL),
+        ("!=", TokenType.BANG_EQUAL),
+    ],
+)
+def test_operators(src: str, expected: TokenType):
+    tokens: list[Token] = scan(src)
+    assert_n_tokens(tokens, 1)
+    assert tokens[0].type == expected
+
+
+@pytest.mark.parametrize(
+    "src,expected",
+    [
+        ("a", TokenType.IDENTIFIER),
+        ("foo", TokenType.IDENTIFIER),
+        ("foo1", TokenType.IDENTIFIER),
+        ("foo_", TokenType.IDENTIFIER),
+        ("foo_bar1_baz2", TokenType.IDENTIFIER),
+        ("FOO_BAR1_BAZ2", TokenType.IDENTIFIER),
+        ("True", TokenType.TRUE),
+        ("False", TokenType.FALSE),
+        ("None", TokenType.NONE),
+    ],
+)
+def test_identifiers_keywords(src: str, expected: TokenType):
+    tokens: list[Token] = scan(src)
+    assert_n_tokens(tokens, 1)
+    assert tokens[0].type == expected
+
+
+@pytest.mark.parametrize(
+    "src,expected",
+    [
+        ("#", TokenType.COMMENT),
+        ("# This is a comment", TokenType.COMMENT),
+        (" ", TokenType.WHITESPACE),
+        ("\t", TokenType.WHITESPACE),
+        ("\r", TokenType.WHITESPACE),
+        ("  \t  \t", TokenType.WHITESPACE),
+        ("\n", TokenType.NEWLINE),
+    ],
+)
+def test_misc(src: str, expected: TokenType):
+    tokens: list[Token] = scan(src)
+    assert_n_tokens(tokens, 1)
+    assert tokens[0].type == expected
+
+
+@pytest.mark.parametrize(
+    "src,expected_type,expected_value",
+    [
+        ("0", TokenType.NUMBER, 0),
+        ("0.0", TokenType.NUMBER, 0),
+        ("1234.56", TokenType.NUMBER, 1234.56),
+    ],
+)
+def test_literals(src: str, expected_type: TokenType, expected_value: Any):
+    tokens: list[Token] = scan(src)
+    assert_n_tokens(tokens, 1)
+    assert tokens[0].type == expected_type
+    assert tokens[0].value == expected_value
+
+
+def test_single_bang_error():
+    with pytest.raises(SyntaxError):
+        scan("!")
+
+
+@pytest.mark.parametrize(
+    "src",
+    [
+        "-",
+        "*",
+        "/",
+        "{",
+        "}",
+        "@",
+        '"',
+        "'",
+        ".",
+    ],
+)
+def test_unexpected_character(src: str):
+    with pytest.raises(SyntaxError):
+        scan(src)
--- a/tests/parser/test_annotation_parser.py
+++ b/tests/parser/test_annotation_parser.py
@@ -0,0 +1,130 @@
+from typing import Optional
+
+import pytest
+
+from core.ast.annotations import (
+    AnnotationStmt,
+    ConstraintExpr,
+    Expr,
+    LiteralExpr,
+    SchemaElementExpr,
+    SchemaExpr,
+    Stmt,
+    TypeExpr,
+    WildcardExpr,
+)
+from lexer.annotations import AnnotationLexer
+from lexer.position import Position
+from lexer.token import Token
+from parser.annotations import AnnotationParser
+
+
+class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]):
+    def serialize(self, stmt: Stmt):
+        return stmt.accept(self)
+
+    def visit_annotation_stmt(self, stmt: AnnotationStmt) -> str:
+        schema: str = ""
+        if stmt.schema is not None:
+            schema = " " + stmt.schema.accept(self)
+        return f"(annotation {stmt.name.lexeme}{schema})"
+
+    def visit_schema_expr(self, expr: SchemaExpr) -> str:
+        elements: list[str] = [elmt.accept(self) for elmt in expr.elements]
+        return f"(schema {' '.join(elements)})"
+
+    def visit_schema_element_expr(self, expr: SchemaElementExpr) -> str:
+        name: str = expr.name.lexeme if expr.name is not None else "_"
+        type: str = expr.type.accept(self) if expr.type is not None else "_"
+        return f"({name} {type})"
+
+    def visit_type_expr(self, expr: TypeExpr) -> str:
+        res: str = f"({expr.name.lexeme}"
+        for constraint in expr.constraints:
+            res += " " + constraint.accept(self)
+        res += ")"
+        return res
+
+    def visit_constraint_expr(self, expr: ConstraintExpr) -> str:
+        return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})"
+
+    def visit_wildcard_expr(self, expr: WildcardExpr) -> str:
+        return "(_)"
+
+    def visit_literal_expr(self, expr: LiteralExpr) -> str:
+        return f"({expr.value})"
+
+
+def parse(source: str) -> Optional[Stmt]:
+    tokens: list[Token] = AnnotationLexer(source).process()
+    return AnnotationParser(tokens).parse()
+
+
+def must_parse(source: str) -> Stmt:
+    stmt: Optional[Stmt] = parse(source)
+    assert stmt is not None
+    return stmt
+
+
+def ast_str(source: str) -> str:
+    stmt: Stmt = must_parse(source)
+    return AstSerializer().serialize(stmt)
+
+
+@pytest.mark.parametrize(
+    "src,expected",
+    [
+        ("Type", "(annotation Type)"),
+        ("Type[]", "(annotation Type (schema ))"),
+        (
+            """
+            Frame[
+                verified: bool,
+                birth_year: int,
+                height: float + ( _ > 0 ) + ( _ < 250 ),
+                name: str,
+                date: datetime,
+                float,  # unnamed
+                unknown: _,  # untyped
+                _  # unnamed and untyped
+            ]
+            """,
+            "(annotation Frame (schema (verified (bool)) (birth_year (int)) (height (float (constraint (_) > (0.0)) (constraint (_) < (250.0)))) (name (str)) (date (datetime)) (_ (float)) (unknown _) (_ _)))",
+        ),
+    ],
+)
+def test_expressions(src: str, expected: str):
+    assert ast_str(src) == expected
+
+
+@pytest.mark.parametrize(
+    "src,pos,should_fail",
+    [
+        ("", (1, 1), True),
+        ("42", (1, 1), True),
+        ("True", (1, 1), True),
+        ("Type[", (1, 6), True),
+        ("Type[] Type2", (1, 8), False),
+        ("Type[bool:]", (1, 11), True),
+        ("Type[3]", (1, 6), True),
+        ("Type[bool float]", (1, 11), True),
+        ("Type[bool (_ < 2)]", (1, 11), True),
+        ("Type[bool + _ < 2)]", (1, 13), True),
+        ("Type[bool + (_ < 2]", (1, 19), True),
+        ("Type[bool + (< 2)]", (1, 14), True),
+        ("Type[bool + (_ + 2)]", (1, 16), True),
+        ("Type[bool + (Foo + Bar)]", (1, 14), True),
+        # ("Type[bool,]", (1, 11), True),  # trailing comma is accepted, TODO: update parser or EBNF
+        ("Type[bool, Type[]]", (1, 16), True),
+        ("Type[foo: 3]", (1, 11), True),
+    ],
+)
+def test_parsing_error(src: str, pos: tuple[int, int], should_fail: bool):
+    tokens: list[Token] = AnnotationLexer(src).process()
+    parser: AnnotationParser = AnnotationParser(tokens)
+    stmt: Optional[Stmt] = parser.parse()
+    if should_fail:
+        assert stmt is None
+    assert len(parser.errors) != 0
+    error_pos: Position = parser.errors[0].token.position
+    assert (error_pos.line, error_pos.column) == pos
--- a/tests/parser/test_midas_parser.py
+++ b/tests/parser/test_midas_parser.py
@@ -0,0 +1,202 @@
+import textwrap
+
+import pytest
+
+from core.ast.midas import (
+    ConstraintExpr,
+    ConstraintStmt,
+    Expr,
+    LiteralExpr,
+    OpStmt,
+    PropertyStmt,
+    Stmt,
+    TypeBodyExpr,
+    TypeExpr,
+    TypeStmt,
+    WildcardExpr,
+)
+from lexer.midas import MidasLexer
+from lexer.position import Position
+from lexer.token import Token
+from parser.midas import MidasParser
+
+
+class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]):
+    def serialize(self, stmt: Stmt):
+        return stmt.accept(self)
+
+    def visit_type_stmt(self, stmt: TypeStmt) -> str:
+        res: str = f"(type_def {stmt.name.lexeme}"
+        for base in stmt.bases:
+            res += " " + base.accept(self)
+        if stmt.body is not None:
+            res += " " + stmt.body.accept(self)
+        res += ")"
+        return res
+
+    def visit_type_expr(self, expr: TypeExpr) -> str:
+        res: str = f"({expr.name.lexeme}"
+        for constraint in expr.constraints:
+            res += " " + constraint.accept(self)
+        res += ")"
+        return res
+
+    def visit_constraint_expr(self, expr: ConstraintExpr) -> str:
+        return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})"
+
+    def visit_wildcard_expr(self, expr: WildcardExpr) -> str:
+        return "(_)"
+
+    def visit_literal_expr(self, expr: LiteralExpr) -> str:
+        return f"({expr.value})"
+
+    def visit_type_body_expr(self, expr: TypeBodyExpr) -> str:
+        res: str = "(body"
+        for prop in expr.properties:
+            res += " " + prop.accept(self)
+        res += ")"
+        return res
+
+    def visit_property_stmt(self, stmt: PropertyStmt) -> str:
+        return f"(property {stmt.name.lexeme} {stmt.type.accept(self)})"
+
+    def visit_op_stmt(self, stmt: OpStmt) -> str:
+        left: str = stmt.left.accept(self)
+        right: str = stmt.right.accept(self)
+        result: str = stmt.result.accept(self)
+        return f"(op_def {left} {stmt.op.lexeme} {right} {result})"
+
+    def visit_constraint_stmt(self, stmt: ConstraintStmt) -> str:
+        return f"(constraint_def {stmt.name.lexeme} {stmt.constraint.accept(self)})"
+
+
+def parse(source: str) -> list[Stmt]:
+    tokens: list[Token] = MidasLexer(source).process()
+    return MidasParser(tokens).parse()
+
+
+def ast_str(source: str) -> list[str]:
+    stmts: list[Stmt] = parse(source)
+    return [AstSerializer().serialize(stmt) for stmt in stmts]
+
+
+@pytest.mark.parametrize(
+    "src,expected",
+    [
+        ("type Foo<>", "(type_def Foo)"),
+        ("type Foo<Bar>", "(type_def Foo (Bar))"),
+        ("type Foo<Bar, Baz>", "(type_def Foo (Bar) (Baz))"),
+        (
+            "type Foo<Bar + (_ < 2), Baz>",
+            "(type_def Foo (Bar (constraint (_) < (2.0))) (Baz))",
+        ),
+        (
+            """
+            type Foo<> {
+                foo: Bar
+            }
+            """,
+            "(type_def Foo (body (property foo (Bar))))",
+        ),
+        (
+            """
+            type Foo<> {
+                foo: Bar + (_ != none)
+                foo2: Bar2 + (0 <= _) + (_ <= 100)
+            }
+            """,
+            "(type_def Foo (body (property foo (Bar (constraint (_) != (None)))) (property foo2 (Bar2 (constraint (0.0) <= (_)) (constraint (_) <= (100.0))))))",
+        ),
+        ("op <A> + <B> = <C>", "(op_def (A) + (B) (C))"),
+        (
+            "op <A + (_ < 100)> + <B + (_ < 100)> = <C + (_ < 200)>",
+            "(op_def (A (constraint (_) < (100.0))) + (B (constraint (_) < (100.0))) (C (constraint (_) < (200.0))))",
+        ),
+        (
+            "constraint Positive = _ >= 0",
+            "(constraint_def Positive (constraint (_) >= (0.0)))",
+        ),
+    ],
+)
+def test_expressions(src: str, expected: str | list[str]):
+    if isinstance(expected, str):
+        expected = [expected]
+    assert ast_str(src) == expected
+
+
+@pytest.mark.parametrize(
+    "src,pos",
+    [
+        ###
+        # Misc
+        ###
+        ("42", (1, 1)),
+        ("true", (1, 1)),
+        ("foo", (1, 1)),
+        ###
+        # Type statements
+        ###
+        ("type", (1, 5)),
+        ("type true", (1, 6)),
+        ("type Foo", (1, 9)),
+        ("type Foo<1>", (1, 10)),
+        # ("type Foo<float,>", (1, 16)),  # trailing comma is accepted, TODO: update parser or EBNF
+        ("type Foo<float, 1>", (1, 17)),
+        ("type Foo<float", (1, 15)),
+        ("type Foo<float> { 3 }", (1, 19)),
+        (
+            """
+            type Foo<float> {
+                foo
+            }
+            """,
+            (4, 1),
+        ),
+        (
+            """
+            type Foo<float> {
+                foo: 3
+            }
+            """,
+            (3, 10),
+        ),
+        ###
+        # Operation statements
+        ###
+        ("op", (1, 3)),
+        ("op float", (1, 4)),
+        ("op <", (1, 5)),
+        ("op <float", (1, 10)),
+        ("op <float>", (1, 11)),
+        ("op <float> +", (1, 13)),
+        ("op <float> + float", (1, 14)),
+        ("op <float> + <", (1, 15)),
+        ("op <float> + <float", (1, 20)),
+        ("op <float> + <float>", (1, 21)),
+        ("op <float> + <float> =", (1, 23)),
+        ("op <float> + <float> = float", (1, 24)),
+        ("op <float> + <float> = <", (1, 25)),
+        ("op <float> + <float> = <float", (1, 30)),
+        ("op <float + 3> + <float> = <float>", (1, 13)),
+        ("op <float> + <float + 3> = <float>", (1, 23)),
+        ("op <float> + <float> = <float + 3>", (1, 33)),
+        ###
+        # Constraint statements
+        ###
+        ("constraint", (1, 11)),
+        ("constraint 3", (1, 12)),
+        ("constraint Foo", (1, 15)),
+        ("constraint Foo =", (1, 17)),
+        ("constraint Foo = 3", (1, 19)),
+        ("constraint Foo = 3 <", (1, 21)),
+    ],
+)
+def test_parsing_error(src: str, pos: tuple[int, int]):
+    src = textwrap.dedent(src)
+    tokens: list[Token] = MidasLexer(src).process()
+    parser: MidasParser = MidasParser(tokens)
+    stmt: list[Stmt] = parser.parse()
+    assert len(stmt) == 0
+    assert len(parser.errors) != 0
+    error_pos: Position = parser.errors[0].token.position
+    assert (error_pos.line, error_pos.column) == pos
--- a/vscode-ext/language-configurations.json
+++ b/vscode-ext/language-configurations.json
@@ -0,0 +1,19 @@
+{
+    "brackets": [
+        ["{", "}"],
+        ["[", "]"],
+        ["<", ">"]
+    ],
+    "autoClosingPairs": [
+        { "open": "{", "close": "}" },
+        { "open": "[", "close": "]" },
+        { "open": "(", "close": ")" },
+        { "open": "<", "close": ">" }
+    ],
+    "surroundingPairs": [
+        ["{", "}"],
+        ["[", "]"],
+        ["(", ")"],
+        ["<", ">"]
+    ]
+}
--- a/vscode-ext/package.json
+++ b/vscode-ext/package.json
@@ -0,0 +1,33 @@
+{
+    "name": "midas",
+    "version": "0.1.0",
+    "engines": {
+        "vscode": "*"
+    },
+    "categories": ["Programming Languages"],
+    "contributes": {
+        "languages": [
+            {
+                "id": "midas",
+                "extensions": [
+                    ".mpy",
+                    ".midas"
+                ],
+                "aliases": [
+                    "Midas"
+                ],
+                "configuration": "./language-configuration.json"
+            }
+        ],
+        "grammars": [
+            {
+                "language": "midas",
+                "scopeName": "source.midas",
+                "path": "./syntaxes/midas.tmLanguage.json",
+                "embeddedLanguages": {
+                    "meta.embedded.block.python": "python"
+                }
+            }
+        ]
+    }
+}
--- a/vscode-ext/syntaxes/midas.tmLanguage.json
+++ b/vscode-ext/syntaxes/midas.tmLanguage.json
@@ -0,0 +1,135 @@
+{
+    "$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
+    "name": "Midas",
+    "scopeName": "source.midas",
+    "patterns": [{ "include": "#statement" }],
+    "repository": {
+        "comment": {
+            "begin": "(//)",
+            "end": "($)",
+            "name": "comment.line",
+            "beginCaptures": {
+                "1": {
+                    "name": "comment.line.double-dash"
+                }
+            }
+        },
+        "type-def": {
+            "begin": "\\b(type)\\s+([a-zA-Z_][a-zA-Z_\\d]*)",
+            "end": "$",
+            "beginCaptures": {
+                "1": {
+                    "name": "keyword.control.type.midas"
+                },
+                "2": {
+                    "name" : "variable.name"
+                }
+            },
+            "patterns": [
+                { "include": "#type-base" },
+                { "include": "#type-body" }
+            ]
+        },
+        "type-base": {
+            "begin": "<",
+            "end": ">",
+            "beginCaptures": {
+                "0": {
+                    "name": "punctuation.definition.base.begin.midas"
+                }
+            },
+            "endCaptures": {
+                "0": {
+                    "name": "punctuation.definition.base.end.midas"
+                }
+            },
+            "patterns": [
+                {"include": "source.python"}
+            ]
+        },
+        "type-body": {
+            "begin": "\\{",
+            "end": "\\}",
+            "beginCaptures": {
+                "0": {
+                    "name": "punctuation.definition.type-body.begin.midas"
+                }
+            },
+            "endCaptures": {
+                "0": {
+                    "name": "punctuation.definition.type-body.end.midas"
+                }
+            },
+            "patterns": [
+                {"include": "#type-prop"}
+            ]
+        },
+        "type-prop": {
+            "match": "([a-zA-Z_][a-zA-Z_\\d]*)(:)\\s*([a-zA-Z_][a-zA-Z_\\d]*)",
+            "captures": {
+                "1": {
+                    "name": "variable.name"
+                },
+                "2": {
+                    "name": "punctuation.separator.annotation.midas"
+                },
+                "3": {
+                    "name": "meta.type.name"
+                }
+            }
+        },
+        "op-def": {
+            "match": "\\b(op)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>\\s+(\\S+)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>\\s+(=)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>",
+            "captures": {
+                "1": {
+                    "name": "keyword.control.op.midas"
+                },
+                "2": {
+                    "name" : "variable.name"
+                },
+                "3": {
+                    "name" : "keyword.operator"
+                },
+                "4": {
+                    "name" : "variable.name"
+                },
+                "5": {
+                    "name" : "keyword.operator.assignment"
+                },
+                "6": {
+                    "name" : "variable.name"
+                }
+            },
+            "patterns": [
+                { "include": "#type-base" },
+                { "include": "#type-body" }
+            ]
+        },
+        "constr-def": {
+            "begin": "(constraint)\\s+([a-zA-Z_][a-zA-Z_\\d]*)\\s*(=)",
+            "end": "$",
+            "beginCaptures": {
+                "1": {
+                    "name": "keyword.control.constr.midas"
+                },
+                "2": {
+                    "name": "variable.name"
+                },
+                "3": {
+                    "name": "keyword.operator.assignment"
+                }
+            },
+            "patterns": [
+                { "include": "source.python" }
+            ]
+        },
+        "statement": {
+            "patterns": [
+                { "include": "#comment" },
+                { "include": "#type-def" },
+                { "include": "#op-def" },
+                { "include": "#constr-def" }
+            ]
+        }
+    }
+}
Author	SHA1	Message	Date
LordBaryhobal	7a831a1afc	fix(parser): handle extra tokens in Midas parser	2026-05-18 18:43:35 +02:00
LordBaryhobal	6922e49cdf	tests(parser): add tests for midas parser	2026-05-18 18:43:03 +02:00
LordBaryhobal	ae02bab030	tests(parser): add tests for annotation parser	2026-05-18 14:42:27 +02:00
LordBaryhobal	7581a35be4	tests(parser): add syntax error test	2026-05-18 13:43:12 +02:00
LordBaryhobal	6f0c0ce326	tests(parser): add literal value test	2026-05-18 13:35:23 +02:00
LordBaryhobal	1b1fbb834e	fix(parser): fix bang equal consume equal token when matching bang-equal	2026-05-18 13:22:11 +02:00
LordBaryhobal	f41c757838	tests(parser): complete simple annotation lexer tests	2026-05-18 13:21:23 +02:00
LordBaryhobal	453c72af6d	tests(parser): add basic lexer test add a basic test for the annotation lexer to check punctuation tokens	2026-05-18 13:11:00 +02:00
LordBaryhobal	ee308fe223	fix(parser): update examples	2026-05-18 12:47:21 +02:00
LordBaryhobal	3f199ff134	feat(parser): add Midas pretty printer	2026-05-18 12:43:05 +02:00
LordBaryhobal	74ac9c5381	feat(parser): parse Midas type constraints	2026-05-18 12:18:41 +02:00
LordBaryhobal	e10d71a66b	feat(parser): split annotation and Midas keywords	2026-05-18 11:33:33 +02:00
LordBaryhobal	8bc0918517	feat(parser): parse annotation type constraints	2026-05-18 11:27:52 +02:00
LordBaryhobal	be50a8db35	feat(parser): add operators to annotations lexer	2026-05-18 11:02:48 +02:00
LordBaryhobal	340bcc65fd	feat(parser): update annotation printers	2026-05-18 11:02:06 +02:00
LordBaryhobal	cbf0f2852e	feat(parser): add AnnotationStmt and ConstraintExpr	2026-05-18 11:01:39 +02:00
LordBaryhobal	64d96bd94e	feat: add Midas railroad diagrams	2026-05-18 10:23:53 +02:00
LordBaryhobal	a3ba0ef35d	feat: add Midas EBNF	2026-05-18 10:23:45 +02:00
LordBaryhobal	903179832e	feat: add annotations railroad diagrams	2026-05-18 10:02:10 +02:00
LordBaryhobal	4fe495620b	feat: add annotations EBNF	2026-05-18 09:51:17 +02:00
LordBaryhobal	539084f6d8	feat(parser): add documentation to Midas parser	2026-05-18 09:17:57 +02:00
LordBaryhobal	5831906f26	feat(parser): add documentation to annotation parser	2026-05-18 08:54:23 +02:00
LordBaryhobal	61b36ee50f	feat(parser): parse constraint statements	2026-05-14 02:44:21 +02:00
LordBaryhobal	4b715ed33a	feat(parser): use midas parser in test script	2026-05-14 02:38:04 +02:00
LordBaryhobal	0af31a6f85	feat(parser): parse op statements	2026-05-14 02:37:50 +02:00
LordBaryhobal	3b40abaa2b	feat(parser): parse type body	2026-05-14 02:29:08 +02:00
LordBaryhobal	6482e06bca	feat(parser): add base Midas parser (incomplete)	2026-05-14 02:19:50 +02:00
LordBaryhobal	4d25b43a4e	fix(parser): prepare printer for midas printer	2026-05-14 02:05:59 +02:00
LordBaryhobal	6d885a0449	feat(parser): use AST printer in test script	2026-05-14 01:12:05 +02:00
LordBaryhobal	052339ad3a	refactor(parser): improve AST printer refactored the messy AST printer impletation with Claude to use a context manager, an enum and extract common functions Co-authored-by: Claude <noreply@anthropic.com>	2026-05-14 01:11:37 +02:00
LordBaryhobal	c420e5e254	feat(parser): add an annotation AST printer	2026-05-14 00:55:31 +02:00
LordBaryhobal	721ed812df	feat(parser): add a basic annotation parser	2026-05-14 00:07:46 +02:00
LordBaryhobal	8252f452f2	feat(parser): add base parser class the parser was adapted from another project (see docstring on the Parser class)	2026-05-13 22:40:27 +02:00
LordBaryhobal	cc4b5dabf2	feat(parser): add midas lexer to test script	2026-05-13 22:40:26 +02:00
LordBaryhobal	1fc842e23f	feat(parser): add basic lexer for type definitions	2026-05-13 22:40:26 +02:00
LordBaryhobal	fcbea218a4	feat(parser): add a test script for the annotation lexer	2026-05-13 22:40:26 +02:00
LordBaryhobal	10ee4991c3	feat(parser): add a basic lexer for annotations	2026-05-13 22:40:25 +02:00
LordBaryhobal	fedc582e16	feat(parser): add base lexer class the lexer and token structures were adapted from another project (see docstring on the Lexer class)	2026-05-13 22:40:19 +02:00
LordBaryhobal	9b59306604	feat: add vscode extension for basic syntax highlighting	2026-05-13 14:38:01 +02:00
LordBaryhobal	3cf3011160	feat: add some syntax examples	2026-05-13 14:36:51 +02:00