Compare commits

40 Commits

Author SHA1 Message Date
7a831a1afc fix(parser): handle extra tokens in Midas parser 2026-05-18 18:43:35 +02:00
6922e49cdf tests(parser): add tests for midas parser 2026-05-18 18:43:03 +02:00
ae02bab030 tests(parser): add tests for annotation parser 2026-05-18 14:42:27 +02:00
7581a35be4 tests(parser): add syntax error test 2026-05-18 13:43:12 +02:00
6f0c0ce326 tests(parser): add literal value test 2026-05-18 13:35:23 +02:00
1b1fbb834e fix(parser): fix bang equal
consume equal token when matching bang-equal
2026-05-18 13:22:11 +02:00
f41c757838 tests(parser): complete simple annotation lexer tests 2026-05-18 13:21:23 +02:00
453c72af6d tests(parser): add basic lexer test
add a basic test for the annotation lexer to check punctuation tokens
2026-05-18 13:11:00 +02:00
ee308fe223 fix(parser): update examples 2026-05-18 12:47:21 +02:00
3f199ff134 feat(parser): add Midas pretty printer 2026-05-18 12:43:05 +02:00
74ac9c5381 feat(parser): parse Midas type constraints 2026-05-18 12:18:41 +02:00
e10d71a66b feat(parser): split annotation and Midas keywords 2026-05-18 11:33:33 +02:00
8bc0918517 feat(parser): parse annotation type constraints 2026-05-18 11:27:52 +02:00
be50a8db35 feat(parser): add operators to annotations lexer 2026-05-18 11:02:48 +02:00
340bcc65fd feat(parser): update annotation printers 2026-05-18 11:02:06 +02:00
cbf0f2852e feat(parser): add AnnotationStmt and ConstraintExpr 2026-05-18 11:01:39 +02:00
64d96bd94e feat: add Midas railroad diagrams 2026-05-18 10:23:53 +02:00
a3ba0ef35d feat: add Midas EBNF 2026-05-18 10:23:45 +02:00
903179832e feat: add annotations railroad diagrams 2026-05-18 10:02:10 +02:00
4fe495620b feat: add annotations EBNF 2026-05-18 09:51:17 +02:00
539084f6d8 feat(parser): add documentation to Midas parser 2026-05-18 09:17:57 +02:00
5831906f26 feat(parser): add documentation to annotation parser 2026-05-18 08:54:23 +02:00
61b36ee50f feat(parser): parse constraint statements 2026-05-14 02:44:21 +02:00
4b715ed33a feat(parser): use midas parser in test script 2026-05-14 02:38:04 +02:00
0af31a6f85 feat(parser): parse op statements 2026-05-14 02:37:50 +02:00
3b40abaa2b feat(parser): parse type body 2026-05-14 02:29:08 +02:00
6482e06bca feat(parser): add base Midas parser (incomplete) 2026-05-14 02:19:50 +02:00
4d25b43a4e fix(parser): prepare printer for midas printer 2026-05-14 02:05:59 +02:00
6d885a0449 feat(parser): use AST printer in test script 2026-05-14 01:12:05 +02:00
052339ad3a refactor(parser): improve AST printer
refactored the messy AST printer impletation with Claude to use a context manager, an enum and extract common functions

Co-authored-by: Claude <noreply@anthropic.com>
2026-05-14 01:11:37 +02:00
c420e5e254 feat(parser): add an annotation AST printer 2026-05-14 00:55:31 +02:00
721ed812df feat(parser): add a basic annotation parser 2026-05-14 00:07:46 +02:00
8252f452f2 feat(parser): add base parser class
the parser was adapted from another project (see docstring on the Parser class)
2026-05-13 22:40:27 +02:00
cc4b5dabf2 feat(parser): add midas lexer to test script 2026-05-13 22:40:26 +02:00
1fc842e23f feat(parser): add basic lexer for type definitions 2026-05-13 22:40:26 +02:00
fcbea218a4 feat(parser): add a test script for the annotation lexer 2026-05-13 22:40:26 +02:00
10ee4991c3 feat(parser): add a basic lexer for annotations 2026-05-13 22:40:25 +02:00
fedc582e16 feat(parser): add base lexer class
the lexer and token structures were adapted from another project (see docstring on the Lexer class)
2026-05-13 22:40:19 +02:00
9b59306604 feat: add vscode extension for basic syntax highlighting 2026-05-13 14:38:01 +02:00
3cf3011160 feat: add some syntax examples 2026-05-13 14:36:51 +02:00
29 changed files with 2643 additions and 0 deletions

6
.gitignore vendored
View File

@@ -0,0 +1,6 @@
.vscode
__pycache__
.env
venv
.venv
*.pyc

107
core/ast/annotations.py Normal file
View File

@@ -0,0 +1,107 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Generic, Optional, TypeVar
from lexer.token import Token
T = TypeVar("T")
@dataclass(frozen=True)
class Stmt(ABC):
@abstractmethod
def accept(self, visitor: Visitor[T]) -> T: ...
class Visitor(ABC, Generic[T]):
@abstractmethod
def visit_annotation_stmt(self, stmt: AnnotationStmt) -> T: ...
@dataclass(frozen=True)
class AnnotationStmt(Stmt):
name: Token
schema: Optional[SchemaExpr]
def accept(self, visitor: Stmt.Visitor[T]) -> T:
return visitor.visit_annotation_stmt(self)
@dataclass(frozen=True)
class Expr(ABC):
@abstractmethod
def accept(self, visitor: Visitor[T]) -> T: ...
class Visitor(ABC, Generic[T]):
@abstractmethod
def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ...
@abstractmethod
def visit_literal_expr(self, expr: LiteralExpr) -> T: ...
@abstractmethod
def visit_type_expr(self, expr: TypeExpr) -> T: ...
@abstractmethod
def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ...
@abstractmethod
def visit_schema_expr(self, expr: SchemaExpr) -> T: ...
@abstractmethod
def visit_schema_element_expr(self, expr: SchemaElementExpr) -> T: ...
@dataclass(frozen=True)
class WildcardExpr(Expr):
token: Token
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_wildcard_expr(self)
@dataclass(frozen=True)
class LiteralExpr(Expr):
value: Any
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_literal_expr(self)
@dataclass(frozen=True)
class TypeExpr(Expr):
name: Token
constraints: list[ConstraintExpr]
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_type_expr(self)
@dataclass(frozen=True)
class ConstraintExpr(Expr):
left: Expr
op: Token
right: Expr
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_constraint_expr(self)
@dataclass(frozen=True)
class SchemaExpr(Expr):
left: Token
elements: list[Expr]
right: Token
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_schema_expr(self)
@dataclass(frozen=True)
class SchemaElementExpr(Expr):
name: Optional[Token]
type: Optional[Expr]
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_schema_element_expr(self)

138
core/ast/midas.py Normal file
View File

@@ -0,0 +1,138 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Any, Generic, Optional, TypeVar
from lexer.token import Token
T = TypeVar("T")
# Statements
@dataclass(frozen=True)
class Stmt(ABC):
@abstractmethod
def accept(self, visitor: Visitor[T]) -> T: ...
class Visitor(ABC, Generic[T]):
@abstractmethod
def visit_type_stmt(self, stmt: TypeStmt) -> T: ...
@abstractmethod
def visit_property_stmt(self, stmt: PropertyStmt) -> T: ...
@abstractmethod
def visit_op_stmt(self, stmt: OpStmt) -> T: ...
@abstractmethod
def visit_constraint_stmt(self, stmt: ConstraintStmt) -> T: ...
@dataclass(frozen=True)
class TypeStmt(Stmt):
name: Token
bases: list[TypeExpr]
body: Optional[TypeBodyExpr]
def accept(self, visitor: Stmt.Visitor[T]) -> T:
return visitor.visit_type_stmt(self)
@dataclass(frozen=True)
class PropertyStmt(Stmt):
name: Token
type: TypeExpr
def accept(self, visitor: Stmt.Visitor[T]) -> T:
return visitor.visit_property_stmt(self)
@dataclass(frozen=True)
class OpStmt(Stmt):
left: TypeExpr
op: Token
right: TypeExpr
result: TypeExpr
def accept(self, visitor: Stmt.Visitor[T]) -> T:
return visitor.visit_op_stmt(self)
@dataclass(frozen=True)
class ConstraintStmt(Stmt):
name: Token
constraint: ConstraintExpr
def accept(self, visitor: Stmt.Visitor[T]) -> T:
return visitor.visit_constraint_stmt(self)
# Expressions
@dataclass(frozen=True)
class Expr(ABC):
@abstractmethod
def accept(self, visitor: Visitor[T]) -> T: ...
class Visitor(ABC, Generic[T]):
@abstractmethod
def visit_wildcard_expr(self, expr: WildcardExpr) -> T: ...
@abstractmethod
def visit_literal_expr(self, expr: LiteralExpr) -> T: ...
@abstractmethod
def visit_type_expr(self, expr: TypeExpr) -> T: ...
@abstractmethod
def visit_constraint_expr(self, expr: ConstraintExpr) -> T: ...
@abstractmethod
def visit_type_body_expr(self, expr: TypeBodyExpr) -> T: ...
@dataclass(frozen=True)
class WildcardExpr(Expr):
token: Token
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_wildcard_expr(self)
@dataclass(frozen=True)
class LiteralExpr(Expr):
value: Any
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_literal_expr(self)
@dataclass(frozen=True)
class TypeExpr(Expr):
name: Token
constraints: list[ConstraintExpr]
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_type_expr(self)
@dataclass(frozen=True)
class ConstraintExpr(Expr):
left: Expr
op: Token
right: Expr
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_constraint_expr(self)
@dataclass(frozen=True)
class TypeBodyExpr(Expr):
properties: list[PropertyStmt]
def accept(self, visitor: Expr.Visitor[T]) -> T:
return visitor.visit_type_body_expr(self)

360
core/ast/printer.py Normal file
View File

@@ -0,0 +1,360 @@
from __future__ import annotations
from contextlib import contextmanager
from enum import Enum, auto
import io
from typing import Generator, Generic, Optional, Protocol, TypeVar
import core.ast.annotations as a
import core.ast.midas as m
class _Level(Enum):
EMPTY = auto()
ACTIVE = auto()
LAST = auto()
class Expr(Protocol):
def accept(self, printer: AstPrinter) -> None: ...
T = TypeVar("T", bound=Expr)
class AstPrinter(Generic[T]):
LAST_CHILD = "└── "
CHILD = "├── "
VERTICAL = ""
EMPTY = " "
def __init__(self):
self._levels: list[_Level] = []
self._idx: Optional[int] = None
self._buf: io.StringIO = io.StringIO()
def print(self, expr: T):
self._buf = io.StringIO()
expr.accept(self)
return self._buf.getvalue()
@contextmanager
def _child_level(self, last: bool = False) -> Generator[None, None, None]:
self._levels.append(_Level.LAST if last else _Level.ACTIVE)
try:
yield
finally:
self._levels.pop()
def _mark_last(self):
if self._levels:
self._levels[-1] = _Level.LAST
def _write_line(self, text: str, *, last: bool = False):
if last:
self._mark_last()
indent: str = self._build_indent()
if self._idx is not None:
text = f"[{self._idx}] {text}"
self._idx = None
self._buf.write(indent + text + "\n")
def _build_indent(self) -> str:
parts: list[str] = []
for level in self._levels[:-1]:
parts.append(self.EMPTY if level == _Level.EMPTY else self.VERTICAL)
if self._levels:
if self._levels[-1] == _Level.LAST:
parts.append(self.LAST_CHILD)
self._levels[-1] = _Level.EMPTY
else:
parts.append(self.CHILD)
return "".join(parts)
def _write_optional_child(
self, label: str, child: Optional[T], *, last: bool = False
):
if last:
self._mark_last()
if child is None:
self._write_line(f"{label}: None")
else:
self._write_line(label)
with self._child_level(last=True):
child.accept(self)
class AnnotationAstPrinter(AstPrinter, a.Expr.Visitor[None], a.Stmt.Visitor[None]):
def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> None:
self._write_line("AnnotationStmt")
with self._child_level():
self._write_line(f'name: "{stmt.name.lexeme}"')
self._write_optional_child("schema", stmt.schema, last=True)
def visit_type_expr(self, expr: a.TypeExpr):
self._write_line("TypeExpr")
with self._child_level():
self._write_line(f'name: "{expr.name.lexeme}"')
self._write_line("constraints", last=True)
with self._child_level():
for i, constraint in enumerate(expr.constraints):
self._idx = i
if i == len(expr.constraints) - 1:
self._mark_last()
constraint.accept(self)
def visit_constraint_expr(self, expr: a.ConstraintExpr) -> None:
self._write_line("ConstraintExpr")
with self._child_level():
self._write_line("left")
with self._child_level():
self._mark_last()
expr.left.accept(self)
self._write_line(f"operator: {expr.op.lexeme}")
self._write_line("right", last=True)
with self._child_level():
self._mark_last()
expr.right.accept(self)
def visit_schema_expr(self, expr: a.SchemaExpr):
self._write_line("SchemaExpr")
with self._child_level():
for i, elmt in enumerate(expr.elements):
self._idx = i
if i == len(expr.elements) - 1:
self._mark_last()
elmt.accept(self)
def visit_schema_element_expr(self, expr: a.SchemaElementExpr):
self._write_line("SchemaElementExpr")
with self._child_level():
name_text: str = "None" if expr.name is None else f'"{expr.name.lexeme}"'
self._write_line(f"name: {name_text}")
self._write_optional_child("type", expr.type, last=True)
def visit_wildcard_expr(self, expr: a.WildcardExpr) -> None:
self._write_line("WildcardExpr")
def visit_literal_expr(self, expr: a.LiteralExpr) -> None:
self._write_line("LiteralExpr")
with self._child_level():
self._write_line(f"value: {expr.value}", last=True)
class AnnotationPrinter(a.Expr.Visitor[str], a.Stmt.Visitor[str]):
def print(self, expr: a.Expr | a.Stmt):
return expr.accept(self)
def visit_annotation_stmt(self, stmt: a.AnnotationStmt) -> str:
schema: str = ""
if stmt.schema is not None:
schema = stmt.schema.accept(self)
return f"{stmt.name.lexeme}{schema}"
def visit_type_expr(self, expr: a.TypeExpr) -> str:
parts: list[str] = [expr.name.lexeme]
for constraint in expr.constraints:
parts.append("(" + constraint.accept(self) + ")")
return " + ".join(parts)
def visit_constraint_expr(self, expr: a.ConstraintExpr) -> str:
parts: list[str] = [
expr.left.accept(self),
expr.op.lexeme,
expr.right.accept(self),
]
return " ".join(parts)
def visit_schema_expr(self, expr: a.SchemaExpr) -> str:
res: str = expr.left.lexeme
res += ", ".join(elmt.accept(self) for elmt in expr.elements)
res += expr.right.lexeme
return res
def visit_schema_element_expr(self, expr: a.SchemaElementExpr) -> str:
parts: list[str] = []
if expr.name is not None:
parts.append(expr.name.lexeme)
if expr.type is None:
parts.append("_")
else:
parts.append(expr.type.accept(self))
return ": ".join(parts)
def visit_wildcard_expr(self, expr: a.WildcardExpr) -> str:
return "_"
def visit_literal_expr(self, expr: a.LiteralExpr) -> str:
return str(expr.value)
class MidasAstPrinter(AstPrinter, m.Expr.Visitor[None], m.Stmt.Visitor[None]):
def visit_type_stmt(self, stmt: m.TypeStmt):
self._write_line("TypeStmt")
with self._child_level():
self._write_line(f'name: "{stmt.name.lexeme}"')
self._write_line("bases")
with self._child_level():
for i, base in enumerate(stmt.bases):
self._idx = i
if i == len(stmt.bases) - 1:
self._mark_last()
base.accept(self)
self._write_optional_child("body", stmt.body, last=True)
def visit_property_stmt(self, stmt: m.PropertyStmt):
self._write_line("PropertyStmt")
with self._child_level():
self._write_line(f'name: "{stmt.name.lexeme}"')
self._write_line("type", last=True)
with self._child_level():
self._mark_last()
stmt.type.accept(self)
def visit_op_stmt(self, stmt: m.OpStmt) -> None:
self._write_line("OpStmt")
with self._child_level():
self._write_line("left")
with self._child_level():
self._mark_last()
stmt.left.accept(self)
self._write_line(f'op: "{stmt.op.lexeme}"')
self._write_line("right")
with self._child_level():
self._mark_last()
stmt.right.accept(self)
self._write_line("result", last=True)
with self._child_level():
self._mark_last()
stmt.result.accept(self)
def visit_constraint_stmt(self, stmt: m.ConstraintStmt):
self._write_line("ConstraintStmt")
with self._child_level():
self._write_line(f'name: "{stmt.name.lexeme}"')
self._write_line("constraint", last=True)
with self._child_level():
self._mark_last()
stmt.constraint.accept(self)
def visit_type_expr(self, expr: m.TypeExpr):
self._write_line("TypeExpr")
with self._child_level():
self._write_line(f'name: "{expr.name.lexeme}"')
self._write_line("constraints", last=True)
with self._child_level():
for i, constraint in enumerate(expr.constraints):
self._idx = i
if i == len(expr.constraints) - 1:
self._mark_last()
constraint.accept(self)
def visit_constraint_expr(self, expr: m.ConstraintExpr):
self._write_line("ConstraintExpr")
with self._child_level():
self._write_line("left")
with self._child_level():
self._mark_last()
expr.left.accept(self)
self._write_line(f"operator: {expr.op.lexeme}")
self._write_line("right", last=True)
with self._child_level():
self._mark_last()
expr.right.accept(self)
def visit_type_body_expr(self, expr: m.TypeBodyExpr):
self._write_line("TypeBodyExpr")
with self._child_level():
self._write_line("properties", last=True)
with self._child_level():
for i, property in enumerate(expr.properties):
self._idx = i
if i == len(expr.properties) - 1:
self._mark_last()
property.accept(self)
def visit_wildcard_expr(self, expr: m.WildcardExpr) -> None:
self._write_line("WildcardExpr")
def visit_literal_expr(self, expr: m.LiteralExpr) -> None:
self._write_line("LiteralExpr")
with self._child_level():
self._write_line(f"value: {expr.value}", last=True)
class MidasPrinter(m.Expr.Visitor[str], m.Stmt.Visitor[str]):
def __init__(self, indent: int = 4):
self.indent: int = indent
self.level: int = 0
def indented(self, text: str) -> str:
return " " * (self.level * self.indent) + text
def print(self, expr: m.Expr | m.Stmt):
self.level = 0
return expr.accept(self)
def visit_type_stmt(self, stmt: m.TypeStmt):
bases: list[str] = [
b.accept(self)
for b in stmt.bases
]
res: str = self.indented(f"type {stmt.name.lexeme}<{', '.join(bases)}>")
if stmt.body is not None:
res += " {\n"
self.level += 1
res += stmt.body.accept(self)
self.level -= 1
res += "\n" + self.indented("}")
return res
def visit_property_stmt(self, stmt: m.PropertyStmt):
return f"{stmt.name.lexeme}: {stmt.type.accept(self)}"
def visit_op_stmt(self, stmt: m.OpStmt):
left: str = stmt.left.accept(self)
op: str = stmt.op.lexeme
right: str = stmt.right.accept(self)
result: str = stmt.result.accept(self)
return self.indented(f"op <{left}> {op} <{right}> = <{result}>")
def visit_constraint_stmt(self, stmt: m.ConstraintStmt):
name: str = stmt.name.lexeme
constraint: str = stmt.constraint.accept(self)
return self.indented(f"constraint {name} = {constraint}")
def visit_type_expr(self, expr: m.TypeExpr):
parts: list[str] = [expr.name.lexeme]
for constraint in expr.constraints:
parts.append("(" + constraint.accept(self) + ")")
return " + ".join(parts)
def visit_constraint_expr(self, expr: m.ConstraintExpr):
parts: list[str] = [
expr.left.accept(self),
expr.op.lexeme,
expr.right.accept(self),
]
return " ".join(parts)
def visit_type_body_expr(self, expr: m.TypeBodyExpr):
properties: list[str] = [
self.indented(prop.accept(self))
for prop in expr.properties
]
return "\n".join(properties)
def visit_wildcard_expr(self, expr: m.WildcardExpr):
return "_"
def visit_literal_expr(self, expr: m.LiteralExpr):
return str(expr.value)

View File

@@ -0,0 +1,16 @@
# type: ignore
# ruff: disable[F821]
from __future__ import annotations
# A simple data-frame with different column of various simple types
# Columns can be named and/or typed
df: Frame[
verified: bool,
birth_year: int,
height: float + ( _ > 0 ) + ( _ < 250 ),
name: str,
date: datetime,
float, # unnamed
unknown: _, # untyped
_ # unnamed and untyped
]

View File

@@ -0,0 +1,24 @@
// Simple custom type derived from floats
type Latitude<float>
type Longitude<float>
// Complex custom type, containing two values accessible through properties
type GeoLocation<Latitude, Longitude> {
lat: Latitude
lon: Longitude
}
type LatitudeDiff<float>
type LongitudeDiff<float>
// Simple operation defined on our custom types
op <Latitude> - <Latitude> = <LatitudeDiff>
op <Longitude> - <Longitude> = <LongitudeDiff>
// Simple custom type with a constraint
type Age<int + (0 <= _) + (_ < 150)>
// Predefined custom constraints that can be referenced in other definitions
constraint Positive = _ >= 0
constraint StrictlyPositive = _ > 0
//constraint Even = _ % 2 == 0

View File

@@ -0,0 +1,34 @@
# type: ignore
# ruff: disable[F821]
from __future__ import annotations
# Prototype of custom type import to use valid Python syntax
import midas
midas.using("02_custom_types.midas")
# A data-frame using a custom type
df: Frame[
location: GeoLocation
]
# Properties of a type can be used on a column of that type
lat: Column[GeoLocation] = df["location"].lat
lon: Column[GeoLocation] = df["location"].lon
# Unregistered operations between types are not permitted
lat + lon # Invalid operation
# Registered operations are permitted
lat1: Latitude = lat[0]
lat2: Latitude = lat[1]
lat_diff: LatitudeDiff = lat2 - lat1 # Valid operation
# In addition to the type, a column can have one or more constraints, either defined inline or in a separate file
df2: Frame[
age: int + (_ >= 0),
height: float + (_ >= 0),
]
df2_bis: Frame[
age: int + Positive,
height: float + Positive,
]

0
lexer/__init__.py Normal file
View File

102
lexer/annotations.py Normal file
View File

@@ -0,0 +1,102 @@
from lexer.base import Lexer
from lexer.keyword import ANNOTATION_KEYWORDS
from lexer.token import TokenType
class AnnotationLexer(Lexer):
def scan_token(self) -> None:
char: str = self.advance()
match char:
case "(":
self.add_token(TokenType.LEFT_PAREN)
case ")":
self.add_token(TokenType.RIGHT_PAREN)
case "[":
self.add_token(TokenType.LEFT_BRACKET)
case "]":
self.add_token(TokenType.RIGHT_BRACKET)
case "<":
self.add_token(
TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
)
case ">":
self.add_token(
TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
)
case "=":
self.add_token(
TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
)
case "!":
if self.match("="):
self.add_token(TokenType.BANG_EQUAL)
else:
self.error("Unexpected single bang. Did you mean '!=' ?")
case ":":
self.add_token(TokenType.COLON)
case ",":
self.add_token(TokenType.COMMA)
case "_":
self.add_token(TokenType.UNDERSCORE)
case "+":
self.add_token(TokenType.PLUS)
case "#":
self.scan_comment()
case "\n":
self.add_token(TokenType.NEWLINE)
case " " | "\r" | "\t":
# Consume all whitespace characters until EOL or EOF
while (
self.peek().isspace()
and self.peek() != "\n"
and not self.is_at_end()
):
self.advance()
self.add_token(TokenType.WHITESPACE)
case _:
if char.isdigit():
self.scan_number()
elif char.isalpha():
self.scan_identifier()
else:
self.error("Unexpected character")
return None
def scan_number(self):
"""Scan the rest of number and add it as a token
This method handles both simple integers and floats. Scientific notation
and base prefixes (0x, 0b, 0o) are not supported
"""
while self.peek().isdigit():
self.advance()
if self.peek() == "." and self.peek_next().isdigit():
self.advance()
while self.peek().isdigit():
self.advance()
value: float = float(self.source[self.start : self.idx])
self.add_token(TokenType.NUMBER, value)
def scan_identifier(self):
"""Scan the rest of an identifier and add it as a token
An identifier starts with a letter, followed by any number of
alphanumerical characters or underscores
"""
while self.peek().isalnum() or self.peek() == "_":
self.advance()
lexeme: str = self.source[self.start : self.idx]
token_type: TokenType = ANNOTATION_KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
self.add_token(token_type)
def scan_comment(self):
"""Scan the rest of a comment and add it as a token
A comment starts with a `#` character and ends at the EOL/EOF
"""
while self.peek() != "\n" and not self.is_at_end():
self.advance()
self.add_token(TokenType.COMMENT)

166
lexer/base.py Normal file
View File

@@ -0,0 +1,166 @@
from abc import ABC, abstractmethod
from typing import Any, Callable, Optional
from lexer.position import Position
from lexer.token import Token, TokenType
class Lexer(ABC):
"""An abstract lexer which provides methods to easily extend it into a concrete one
This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
[1]: https://craftinginterpreters.com/
"""
def __init__(self, source: str, file: Optional[str] = None) -> None:
"""Create a new lexer to scan for tokens in the given source
Args:
source (str): the source to scan
file (Optional[str], optional): the path of the given source. Can be a file path or any string identifier. Defaults to None.
"""
self.source: str = source
self.file: Optional[str] = file
self.tokens: list[Token] = []
self.start: int = 0
self.idx: int = 0
self.length: int = len(self.source)
self.line: int = 1
self.column: int = 1
self.start_pos: Position = self.get_position()
def error(self, msg: str):
"""Raise a syntax error
Args:
msg (str): the error message
Raises:
SyntaxError
"""
raise SyntaxError(f"[ERROR] Error at {self.start_pos}: {msg}")
def process(self) -> list[Token]:
"""Scan tokens out of the source text
Returns:
list[Token]: all the tokens that could be scanned
Raises:
SyntaxError: if a syntax error is found
"""
self.scan_tokens()
self.tokens.append(Token(TokenType.EOF, "", None, self.get_position()))
return self.tokens
def is_at_end(self) -> bool:
"""Whether the lexer is at the end of the source
Returns:
bool: True if the current index is at the end of the source
"""
return self.idx >= self.length
def get_position(self) -> Position:
"""Get the current position
Returns:
Position: the current position
"""
return Position(file=self.file, line=self.line, column=self.column)
def peek(self) -> str:
"""Get the current character without advancing, if any
Returns:
str: the current character, or an empty string if at EOF
"""
if self.idx < self.length:
return self.source[self.idx]
return ""
def peek_next(self) -> str:
"""Get the next character without advancing, if any
Returns:
str: the next character, or an empty string if at EOF
"""
if self.idx + 1 < self.length:
return self.source[self.idx + 1]
return ""
def advance(self) -> str:
"""Get the new character and advance
Returns:
str: the current character, before advancing
"""
char: str = self.peek()
self.idx += 1
self.column += 1
if char == "\n":
self.newline()
return char
def newline(self):
"""Update the current position after encountering a newline character"""
self.line += 1
self.column = 1
def match(self, expected: str) -> bool:
"""Consume the next character if it matches the given value
Args:
expected (str): the expected character
Returns:
bool: whether a character was matched and consumed
"""
if self.peek() == expected:
self.advance()
return True
return False
def update_start(self):
"""Update the starting position of the current lexeme
The cursor marking the start of the lexeme currently being scanned is
moved to the current position
"""
self.start_pos = self.get_position()
self.start = self.idx
def add_token(self, token_type: TokenType, value: Optional[Any] = None):
"""Add the current lexeme to the list of scanned tokens
Args:
token_type (TokenType): the type of token to add
value (Optional[Any], optional): the value of the token (useful for numbers or constants). Defaults to None.
"""
lexeme: str = self.source[self.start : self.idx]
self.tokens.append(
Token(position=self.start_pos, type=token_type, lexeme=lexeme, value=value)
)
def scan_tokens(self, condition: Optional[Callable[[], bool]] = None):
"""Scan tokens until EOF is reached or the given condition becomes False
Args:
condition (Optional[Callable[[], bool]], optional): the condition to continue scanning tokens.
If None, defaults to always being True, effectively scanning tokens until EOF is reached. Defaults to None.
"""
if condition is None:
condition = lambda: True # noqa: E731
while condition() and not self.is_at_end():
self.update_start()
self.scan_token()
@abstractmethod
def scan_token(self) -> None:
"""Scan a token
This function should (at least) consume the current character and produce the appropriate token(s), using `add_token`
"""
pass

16
lexer/keyword.py Normal file
View File

@@ -0,0 +1,16 @@
from lexer.token import TokenType
ANNOTATION_KEYWORDS: dict[str, TokenType] = {
"True": TokenType.TRUE,
"False": TokenType.FALSE,
"None": TokenType.NONE,
}
MIDAS_KEYWORDS: dict[str, TokenType] = {
"type": TokenType.TYPE,
"op": TokenType.OP,
"constraint": TokenType.CONSTRAINT,
"true": TokenType.TRUE,
"false": TokenType.FALSE,
"none": TokenType.NONE,
}

131
lexer/midas.py Normal file
View File

@@ -0,0 +1,131 @@
from lexer.base import Lexer
from lexer.keyword import MIDAS_KEYWORDS
from lexer.token import TokenType
class MidasLexer(Lexer):
def scan_token(self) -> None:
char: str = self.advance()
match char:
case "(":
self.add_token(TokenType.LEFT_PAREN)
case ")":
self.add_token(TokenType.RIGHT_PAREN)
case "[":
self.add_token(TokenType.LEFT_BRACKET)
case "]":
self.add_token(TokenType.RIGHT_BRACKET)
case "{":
self.add_token(TokenType.LEFT_BRACE)
case "}":
self.add_token(TokenType.RIGHT_BRACE)
case "<":
self.add_token(
TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
)
case ">":
self.add_token(
TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
)
case "=":
self.add_token(
TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
)
case "!":
if self.match("="):
self.add_token(TokenType.BANG_EQUAL)
else:
self.error("Unexpected single bang. Did you mean '!=' ?")
case ":":
self.add_token(TokenType.COLON)
case ",":
self.add_token(TokenType.COMMA)
case "_":
self.add_token(TokenType.UNDERSCORE)
case "+":
self.add_token(TokenType.PLUS)
case "-":
self.add_token(TokenType.MINUS)
case "*":
self.add_token(TokenType.STAR)
case "/":
if self.match("/"):
self.scan_comment()
elif self.match("*"):
self.scan_comment_multiline()
else:
self.add_token(TokenType.SLASH)
case "\n":
self.add_token(TokenType.NEWLINE)
case " " | "\r" | "\t":
# Consume all whitespace characters until EOL or EOF
while (
self.peek().isspace()
and self.peek() != "\n"
and not self.is_at_end()
):
self.advance()
self.add_token(TokenType.WHITESPACE)
case _:
if char.isdigit():
self.scan_number()
elif char.isalpha():
self.scan_identifier()
else:
self.error("Unexpected character")
return None
def scan_number(self):
"""Scan the rest of number and add it as a token
This method handles both simple integers and floats. Scientific notation
and base prefixes (0x, 0b, 0o) are not supported
"""
while self.peek().isdigit():
self.advance()
if self.peek() == "." and self.peek_next().isdigit():
self.advance()
while self.peek().isdigit():
self.advance()
value: float = float(self.source[self.start : self.idx])
self.add_token(TokenType.NUMBER, value)
def scan_identifier(self):
"""Scan the rest of an identifier and add it as a token
An identifier starts with a letter, followed by any number of
alphanumerical characters or underscores
"""
while self.peek().isalnum() or self.peek() == "_":
self.advance()
lexeme: str = self.source[self.start : self.idx]
token_type: TokenType = MIDAS_KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
self.add_token(token_type)
def scan_comment(self):
"""Scan the rest of a comment and add it as a token
A comment starts with `//` and ends at the EOL/EOF
"""
while self.peek() != "\n" and not self.is_at_end():
self.advance()
self.add_token(TokenType.COMMENT)
def scan_comment_multiline(self):
"""Scan the rest of a multiline comment and add it as a token
A multiline comment starts with `/*` and ends with `*/` or at the EOF
"""
while (
not (self.peek() == "*" and self.peek_next() == "/")
and not self.is_at_end()
):
self.advance()
if not self.is_at_end():
self.advance()
if not self.is_at_end():
self.advance()
self.add_token(TokenType.COMMENT)

13
lexer/position.py Normal file
View File

@@ -0,0 +1,13 @@
from dataclasses import dataclass
from typing import Optional
@dataclass(frozen=True)
class Position:
"""A simple structure to store the position of a token"""
file: Optional[str]
line: int
column: int
def __repr__(self):
return f"{self.file or ''}L{self.line}:{self.column}"

59
lexer/token.py Normal file
View File

@@ -0,0 +1,59 @@
from dataclasses import dataclass
from enum import Enum, auto
from typing import Any
from lexer.position import Position
class TokenType(Enum):
# Punctuation
LEFT_PAREN = auto()
RIGHT_PAREN = auto()
LEFT_BRACKET = auto()
RIGHT_BRACKET = auto()
LEFT_BRACE = auto()
RIGHT_BRACE = auto()
COLON = auto()
COMMA = auto()
UNDERSCORE = auto()
# Operators
PLUS = auto()
MINUS = auto()
STAR = auto()
SLASH = auto()
GREATER = auto()
GREATER_EQUAL = auto()
LESS = auto()
LESS_EQUAL = auto()
EQUAL = auto()
EQUAL_EQUAL = auto()
BANG_EQUAL = auto()
# Literals
IDENTIFIER = auto()
NUMBER = auto()
TRUE = auto()
FALSE = auto()
NONE = auto()
# Keywords
TYPE = auto()
OP = auto()
CONSTRAINT = auto()
# Misc
COMMENT = auto()
WHITESPACE = auto()
EOF = auto()
NEWLINE = auto()
@dataclass(frozen=True)
class Token:
"""A scanned token"""
type: TokenType
lexeme: str
value: Any
position: Position

152
parser/annotations.py Normal file
View File

@@ -0,0 +1,152 @@
from typing import Optional
from core.ast.annotations import (
AnnotationStmt,
ConstraintExpr,
Expr,
LiteralExpr,
SchemaElementExpr,
SchemaExpr,
Stmt,
TypeExpr,
WildcardExpr,
)
from lexer.token import Token, TokenType
from parser.base import Parser
from parser.errors import ParsingError
class AnnotationParser(Parser):
"""A simple parser for custom type annotations"""
SYNC_BOUNDARY: set[TokenType] = set()
def parse(self) -> Optional[Stmt]:
stmt: Optional[Stmt] = None
try:
stmt = self.annotation()
except ParsingError:
self.synchronize()
if not self.is_at_end():
self.error(self.peek(), "Extra tokens")
return stmt
def synchronize(self):
"""Skip tokens until a synchronization boundary is found
This method allows gracefully recovering from a parse error
to a safe place and continue parsing
"""
self.advance()
while not self.is_at_end():
if self.peek().type in self.SYNC_BOUNDARY:
return
self.advance()
def annotation(self) -> AnnotationStmt:
"""Parse an annotation
An annotation is written as `Type` or `Type[Schema]`
Returns:
AnnotationStmt: the parsed annotation statement
"""
name: Token = self.consume(TokenType.IDENTIFIER, "Expected type identifier")
schema: Optional[SchemaExpr] = None
if self.match(TokenType.LEFT_BRACKET):
schema = self.schema()
return AnnotationStmt(name=name, schema=schema)
def type_expr(self) -> TypeExpr:
"""Parse a type expression
Returns:
TypeExpr: the parsed type expression
"""
name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
constraints: list[ConstraintExpr] = []
while not self.is_at_end() and self.match(TokenType.PLUS):
self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint")
constraints.append(self.constraint_expr())
self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint")
return TypeExpr(name=name, constraints=constraints)
def constraint_expr(self) -> ConstraintExpr:
"""Parse a type constraint
Returns:
ConstraintExpr: the parsed type constraint expression
"""
left: Expr = self.constraint_value()
op: Token = self.constraint_operator()
right: Expr = self.constraint_value()
return ConstraintExpr(left=left, op=op, right=right)
def constraint_value(self) -> Expr:
if self.match(TokenType.UNDERSCORE):
return WildcardExpr(self.previous())
return self.literal()
def literal(self) -> LiteralExpr:
if self.match(TokenType.FALSE):
return LiteralExpr(False)
if self.match(TokenType.TRUE):
return LiteralExpr(True)
if self.match(TokenType.NONE):
return LiteralExpr(None)
if self.match(TokenType.NUMBER):
return LiteralExpr(self.previous().value)
raise self.error(self.peek(), "Expected literal")
def constraint_operator(self) -> Token:
if self.match(TokenType.LESS, TokenType.LESS_EQUAL, TokenType.GREATER, TokenType.GREATER_EQUAL, TokenType.EQUAL_EQUAL, TokenType.BANG_EQUAL):
return self.previous()
raise self.error(self.peek(), "Expected constraint operator")
def schema(self) -> SchemaExpr:
"""Parse a schema definition
A comma separated list of schema elements
Returns:
SchemaExpr: the parsed schema expression
"""
left: Token = self.previous()
elements: list[Expr] = []
while not self.check(TokenType.RIGHT_BRACKET) and not self.is_at_end():
elements.append(self.schema_element())
if not self.check(TokenType.RIGHT_BRACKET):
self.consume(TokenType.COMMA, "Expected ',' between schema elements")
right: Token = self.consume(TokenType.RIGHT_BRACKET, "Unclosed schema")
return SchemaExpr(left=left, elements=elements, right=right)
def schema_element(self) -> SchemaElementExpr:
"""Parse a schema element
An anonymous element (`_`), a type, an untyped named column (`name: _`),
or a named column (`name: Type`)
Returns:
SchemaElementExpr: the parsed schema element expression
"""
if self.match(TokenType.UNDERSCORE):
return SchemaElementExpr(name=None, type=None)
if not self.check(TokenType.IDENTIFIER):
raise self.error(self.peek(), "Expected schema element")
name: Optional[Token] = None
type: Optional[TypeExpr] = None
if self.check_next(TokenType.COLON):
name = self.advance()
self.advance()
if not self.match(TokenType.UNDERSCORE):
type = self.type_expr()
return SchemaElementExpr(name=name, type=type)

183
parser/base.py Normal file
View File

@@ -0,0 +1,183 @@
from abc import ABC, abstractmethod
from dataclasses import dataclass
from typing import Generic, TypeVar
from lexer.token import Token, TokenType
from parser.errors import ParsingError
@dataclass(frozen=True)
class TokenError:
"""A parsing error linked to a particular token"""
token: Token
message: str
def get_report(self) -> str:
"""Get a detailed error message
Returns:
str: the complete error message
"""
where: str = f"'{self.token.lexeme}'"
if self.token.type == TokenType.EOF:
where = "end"
return f"({self.token.position}) Error at {where}: {self.message}"
T = TypeVar("T")
class Parser(ABC, Generic[T]):
"""An abstract parser which provides methods to easily extend it into a concrete one
This implementation is based on the [_Crafting Interpreters_][1] book by Robert Nystrom,
more specifically on my [previous Python implementation](https://git.kb28.ch/HEL/pebble)
[1]: https://craftinginterpreters.com/
"""
IGNORE: set[TokenType] = {
TokenType.WHITESPACE,
TokenType.COMMENT,
TokenType.NEWLINE,
}
def __init__(self, tokens: list[Token]) -> None:
"""Create a new parser to parse the given tokens
Args:
tokens (list[Token]): the tokens to parse
"""
self.tokens: list[Token] = list(
filter(lambda t: t.type not in self.IGNORE, tokens)
)
self.current: int = 0
self.length: int = len(self.tokens)
self.errors: list[TokenError] = []
def error(self, token: Token, message: str):
"""Record an error
Args:
token (Token): the token at which the error was detected
message (str): a message explaining the error
Returns:
ParsingError: the parsing error to raise
"""
self.errors.append(TokenError(token=token, message=message))
return ParsingError()
@abstractmethod
def parse(self) -> T:
"""Parse the tokens
Returns:
T: the parsed element(s)
"""
pass
def is_at_end(self) -> bool:
"""Whether the parser is at the end of the token list
Returns:
bool: True if the current index is at the end of the token list
"""
return self.peek().type == TokenType.EOF
def peek(self) -> Token:
"""Get the current token without advancing
Returns:
Token: the current token
"""
return self.tokens[self.current]
def previous(self) -> Token:
"""Get the previous token
This function is unsafe and will raise an IndexError if called when
the parser is at the begin of the token list
Returns:
Token: the previous token
"""
return self.tokens[self.current - 1]
def check(self, token_type: TokenType) -> bool:
"""Check whether the current token is of the given type
This function always returns False if the parser is at the EOF token
Args:
token_type (TokenType): the type of token to check
Returns:
bool: True if the current token is of the given type and not EOF
"""
if self.is_at_end():
return False
return self.peek().type == token_type
def check_next(self, token_type: TokenType) -> bool:
"""Check whether the next token is of the given type
This function always returns False if the parser is at the EOF token
Args:
token_type (TokenType): the type of token to check
Returns:
bool: True if the current token is of the given type and not EOF
"""
if self.is_at_end():
return False
if self.current + 1 >= self.length:
return False
token: Token = self.tokens[self.current + 1]
if token.type == TokenType.EOF:
return False
return token.type == token_type
def advance(self) -> Token:
"""Consume and return the current token, if not at the EOF
Returns:
Token: the current token, before advancing
"""
if not self.is_at_end():
self.current += 1
return self.previous()
def match(self, *types: TokenType) -> bool:
"""Consume the next token if it matches one of the given types
Returns:
bool: whether a token was matched and consumed
"""
for token_type in types:
if self.check(token_type):
self.advance()
return True
return False
def consume(self, token_type: TokenType, error_msg: str) -> Token:
"""Consume the current token if it matches the given type or raise an error
If the current token doesn't match the given type, an error is raised
with the provided message
Args:
token_type (TokenType): the expected token type
error_msg (str): the error message if the token doesn't match
Raises:
SyntaxError: if the current token doesn't match the given type
Returns:
Token: the current token which matched the given type
"""
if self.check(token_type):
return self.advance()
raise self.error(self.peek(), error_msg)

2
parser/errors.py Normal file
View File

@@ -0,0 +1,2 @@
class ParsingError(RuntimeError):
pass

217
parser/midas.py Normal file
View File

@@ -0,0 +1,217 @@
from typing import Optional
from core.ast.midas import (
ConstraintExpr,
ConstraintStmt,
Expr,
LiteralExpr,
OpStmt,
PropertyStmt,
Stmt,
TypeBodyExpr,
TypeExpr,
TypeStmt,
WildcardExpr,
)
from lexer.token import Token, TokenType
from parser.base import Parser
from parser.errors import ParsingError
class MidasParser(Parser):
"""A simple parser for midas type definitions"""
SYNC_BOUNDARY: set[TokenType] = {TokenType.TYPE, TokenType.OP, TokenType.CONSTRAINT}
def parse(self) -> list[Stmt]:
statements: list[Stmt] = []
while not self.is_at_end():
stmt: Optional[Stmt] = self.declaration()
if stmt is None:
print("Early stop")
break
statements.append(stmt)
return statements
def synchronize(self):
"""Skip tokens until a synchronization boundary is found
This method allows gracefully recovering from a parse error
to a safe place and continue parsing
"""
self.advance()
while not self.is_at_end():
if self.previous().type == TokenType.NEWLINE:
return
if self.peek().type in self.SYNC_BOUNDARY:
return
self.advance()
def declaration(self) -> Optional[Stmt]:
"""Try and parse a declaration
Any parsing error is caught and None is returned
Returns:
Optional[Stmt]: the parsed Midas statement, or None if a ParsingError was raised
"""
try:
if self.match(TokenType.TYPE):
return self.type_declaration()
if self.match(TokenType.OP):
return self.op_declaration()
if self.match(TokenType.CONSTRAINT):
return self.constraint_declaration()
raise self.error(self.peek(), "Unexpected token")
except ParsingError:
self.synchronize()
return None
def type_declaration(self) -> TypeStmt:
"""Parse a type declaration
A type declaration is written `type Name<TypeExpr, ...>` optionally followed by a brace-wrapped body
Returns:
TypeStmt: the parsed type declaration statement
"""
name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
self.consume(TokenType.LESS, "Expected '<' after type name")
bases: list[TypeExpr] = []
while not self.check(TokenType.GREATER) and not self.is_at_end():
bases.append(self.type_expr())
if not self.check(TokenType.GREATER):
self.consume(TokenType.COMMA, "Expected ',' between type bases")
self.consume(TokenType.GREATER, "Expected '>' after base type")
body: Optional[TypeBodyExpr] = None
if self.check(TokenType.LEFT_BRACE):
body = self.type_body_expr()
return TypeStmt(name=name, bases=bases, body=body)
def type_expr(self) -> TypeExpr:
"""Parse a type expression
Returns:
TypeExpr: the parsed type expression
"""
name: Token = self.consume(TokenType.IDENTIFIER, "Expected type name")
constraints: list[ConstraintExpr] = []
while not self.is_at_end() and self.match(TokenType.PLUS):
self.consume(TokenType.LEFT_PAREN, "Expected '(' before type constraint")
constraints.append(self.constraint_expr())
self.consume(TokenType.RIGHT_PAREN, "Expected ')' after type constraint")
return TypeExpr(name=name, constraints=constraints)
def constraint_expr(self) -> ConstraintExpr:
"""Parse a type constraint
Returns:
ConstraintExpr: the parsed type constraint expression
"""
left: Expr = self.constraint_value()
op: Token = self.constraint_operator()
right: Expr = self.constraint_value()
return ConstraintExpr(left=left, op=op, right=right)
def constraint_value(self) -> Expr:
if self.match(TokenType.UNDERSCORE):
return WildcardExpr(self.previous())
return self.literal()
def literal(self) -> LiteralExpr:
if self.match(TokenType.FALSE):
return LiteralExpr(False)
if self.match(TokenType.TRUE):
return LiteralExpr(True)
if self.match(TokenType.NONE):
return LiteralExpr(None)
if self.match(TokenType.NUMBER):
return LiteralExpr(self.previous().value)
raise self.error(self.peek(), "Expected literal")
def constraint_operator(self) -> Token:
if self.match(
TokenType.LESS,
TokenType.LESS_EQUAL,
TokenType.GREATER,
TokenType.GREATER_EQUAL,
TokenType.EQUAL_EQUAL,
TokenType.BANG_EQUAL,
):
return self.previous()
raise self.error(self.peek(), "Expected constraint operator")
def type_body_expr(self) -> TypeBodyExpr:
"""Parse a type definition body
A type definition body is a set of whitespace-separated
property statements enclosed in curly braces
Returns:
TypeBodyExpr: the parsed type body expression
"""
self.consume(TokenType.LEFT_BRACE, "Expected '{' to start type body")
properties: list[PropertyStmt] = []
while not self.check(TokenType.RIGHT_BRACE) and not self.is_at_end():
properties.append(self.property_stmt())
self.consume(TokenType.RIGHT_BRACE, "Unclosed type body")
return TypeBodyExpr(properties=properties)
def property_stmt(self) -> PropertyStmt:
"""Parse a property statement
A type property statement is written `name: Type`
Returns:
PropertyStmt: the parsed property statement
"""
name: Token = self.consume(TokenType.IDENTIFIER, "Expected property name")
self.consume(TokenType.COLON, "Expected ':' after property name")
type: TypeExpr = self.type_expr()
return PropertyStmt(name=name, type=type)
def op_declaration(self) -> OpStmt:
"""Parse an operation definition
An operation is written `op <Type1> operator <Type2> = <Type3>` where `operator` can be any single token
Returns:
OpStmt: the parsed operation statement
"""
self.consume(TokenType.LESS, "Expected '<' before first type")
left: TypeExpr = self.type_expr()
self.consume(TokenType.GREATER, "Expected '>' after first type")
op: Token = self.advance()
self.consume(TokenType.LESS, "Expected '<' before second type")
right: TypeExpr = self.type_expr()
self.consume(TokenType.GREATER, "Expected '>' after second type")
self.consume(TokenType.EQUAL, "Expected '=' after second type")
self.consume(TokenType.LESS, "Expected '<' before result type")
result: TypeExpr = self.type_expr()
self.consume(TokenType.GREATER, "Expected '>' after result type")
return OpStmt(left=left, op=op, right=right, result=result)
def constraint_declaration(self) -> ConstraintStmt:
"""Parse a type constraint declaration
A constraint is written `constraint Name = constraint_expression`
Returns:
ConstraintStmt: the parsed constraint declaration statement
"""
name: Token = self.consume(TokenType.IDENTIFIER, "Expected constraint name")
self.consume(TokenType.EQUAL, "Expected '=' after constraint name")
constraint: ConstraintExpr = self.constraint_expr()
return ConstraintStmt(name=name, constraint=constraint)

20
syntax/annotations.ebnf Normal file
View File

@@ -0,0 +1,20 @@
identifier ::= '[a-zA-Z][a-zA-Z_]*'
integer ::= '\d+'
number ::= integer ["." integer]
boolean ::= "False" | "True"
none ::= "None"
value ::= number | boolean | none
lambda-value ::= "_" | value
lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!="
lambda ::= lambda-value lambda-operator lambda-value
constraint ::= identifier | "(" lambda ")"
base-type ::= identifier
type ::= base-type { "+" constraint }
column-type ::= type | "_"
column-def ::= [ identifier ":" ] column-type
frame-def ::= column-def { "," column-def }

74
syntax/annotations.typ Normal file
View File

@@ -0,0 +1,74 @@
#import "@preview/fervojo:0.1.1": render
#let value = ```
{[`value` <
[`number` 'digit' * ! <!, ["." 'digit' * !]>],
[`boolean` <"False", "True">],
[`none` "None"]
>]}
```
#let constraint = ```
{[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]}
```
#let type-with-constraints = ```
{[`type-with-constraints` 'identifier' <!, ["+" "(" 'constraint' ")"] * !>]}
```
#let column-def = ```
{[`column-def` <!, ['identifier' ":"]> <"_", 'type-with-constraints'>]}
```
#let frame-def = ```
{[`frame-def` 'column-def' * ","]}
```
#let annotation = ```
{[`annotation` 'identifier' <!, ["[" 'frame-def' "]"]>]}
```
#let rules = (
value,
constraint,
type-with-constraints,
column-def,
frame-def,
annotation,
)
#set text(font: "Source Sans 3")
= Type annotation syntax
#for rule in rules {
render(rule)
}
/*
#let by-name = (
annotation: annotation,
frame-def: frame-def,
column-def: column-def,
type-with-constraints: type-with-constraints,
constraint: constraint,
value: value,
)
#let substitute(base-rule) = {
let new-rule = base-rule
for (key, rule) in by-name.pairs() {
new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1))
}
if new-rule != base-rule {
new-rule = substitute(new-rule)
}
return new-rule
}
#let combined = raw(substitute(annotation.text))
#set page(flipped: true)
#render(combined)
*/

26
syntax/midas.ebnf Normal file
View File

@@ -0,0 +1,26 @@
identifier ::= '[a-zA-Z][a-zA-Z_]*'
integer ::= '\d+'
number ::= integer ["." integer]
boolean ::= "False" | "True"
none ::= "None"
value ::= number | boolean | none
lambda-value ::= "_" | value
lambda-operator ::= ">" | "<" | ">=" | "<=" | "==" | "!="
lambda ::= lambda-value lambda-operator lambda-value
constraint ::= identifier | "(" lambda ")"
base-type ::= identifier
type ::= base-type { "+" constraint }
type-property ::= 'identifier' ":" 'type'
type-body ::= "{" { 'type-property' } "}"
operation-type ::= "<" 'type' ">"
type-statement ::= "type" 'identifier' "<" 'type' {"," 'type'} ">" ['type-body']
operation-statement ::= "op" 'operation-type' 'operator' 'operation-type' "=" 'operation-type'
constraint-statement ::= "constraint" 'identifier' "=" 'lambda'
statement ::= type-statement | operation-statement | constraint-statement

97
syntax/midas.typ Normal file
View File

@@ -0,0 +1,97 @@
#import "@preview/fervojo:0.1.1": render
#let value = ```
{[`value` <
[`number` 'digit' * ! <!, ["." 'digit' * !]>],
[`boolean` <"False", "True">],
[`none` "None"]
>]}
```
#let constraint = ```
{[`constraint` <"_", 'value'> <">", "<", ">=", "<=", "==", "!="> <"_", 'value'>]}
```
#let type-with-constraints = ```
{[`type-with-constraints` 'identifier' <!, ["+" "(" 'constraint' ")"] * !>]}
```
#let type-property = ```
{[`type-property` 'identifier' ":" 'type-with-constraints']}
```
#let type-body = ```
{[`type-body` "{" <!, 'type-property'*!> "}"]}
```
#let operation-type = ```
{[`operation-type` "<" 'type-with-constraints' ">"]}
```
#let type-statement = ```
{[`type-statement` "type" 'identifier' "<" 'type-with-constraints'*"," ">" <!, 'type-body'>]}
```
#let operation-statement = ```
{[`operation-statement` "op" 'operation-type' "operator" 'operation-type' "=" 'operation-type']}
```
#let constraint-statement = ```
{[`constraint-statement` "constraint" 'identifier' "=" 'constraint']}
```
#let statement = ```
{[`statement` <'type-statement', 'operation-statement', 'constraint-statement'>]}
```
#let rules = (
value,
constraint,
type-with-constraints,
type-property,
type-body,
operation-type,
type-statement,
operation-statement,
constraint-statement,
statement,
)
#set text(font: "Source Sans 3")
= Midas type definition syntax
#for rule in rules {
render(rule)
}
/*
#let by-name = (
value: value,
constraint: constraint,
type-with-constraints: type-with-constraints,
type-property: type-property,
type-body: type-body,
operation-type: operation-type,
type-statement: type-statement,
operation-statement: operation-statement,
constraint-statement: constraint-statement,
)
#let substitute(base-rule) = {
let new-rule = base-rule
for (key, rule) in by-name.pairs() {
new-rule = new-rule.replace("'" + key + "'", rule.text.slice(1, -1))
}
if new-rule != base-rule {
new-rule = substitute(new-rule)
}
return new-rule.replace(regex("`.*?`"), "")
}
#let combined = raw(substitute(statement.text))
#set page(flipped: true)
#render(combined)
*/

52
test.py Normal file
View File

@@ -0,0 +1,52 @@
import importlib
from pathlib import Path
from core.ast.printer import AnnotationAstPrinter, MidasAstPrinter
from lexer.annotations import AnnotationLexer
from lexer.midas import MidasLexer
from lexer.token import Token
from parser.annotations import AnnotationParser
from parser.midas import MidasParser
def test_annotation():
# Frame annotation
mod = importlib.import_module("examples.00_syntax_prototype.01_simple_types")
annotation: str = mod.__annotations__["df"]
lexer: AnnotationLexer = AnnotationLexer(annotation, "01_simple_types.py")
tokens: list[Token] = lexer.process()
# print([f"{t.type.name}('{t.lexeme}')" for t in tokens])
parser = AnnotationParser(tokens)
parsed = parser.parse()
print(parsed)
for err in parser.errors:
print(err.get_report())
printer = AnnotationAstPrinter()
if parsed is not None:
print(printer.print(parsed))
def test_midas():
# Midas type definitions
path: Path = Path("examples") / "00_syntax_prototype" / "02_custom_types.midas"
definitions: str = path.read_text()
midas_lexer: MidasLexer = MidasLexer(definitions, path.name)
tokens: list[Token] = midas_lexer.process()
# print([f"{t.type.name}('{t.lexeme}')" for t in tokens])
parser = MidasParser(tokens)
parsed = parser.parse()
print(parsed)
for err in parser.errors:
print(err.get_report())
printer = MidasAstPrinter()
for stmt in parsed:
if stmt is None:
print("None")
continue
print(printer.print(stmt))
test_midas()

View File

@@ -0,0 +1,129 @@
from typing import Any
import pytest
from lexer.annotations import AnnotationLexer
from lexer.token import Token, TokenType
def scan(source: str) -> list[Token]:
return AnnotationLexer(source).process()
def assert_n_tokens(tokens: list[Token], n: int):
assert len(tokens) == n + 1
assert tokens[-1].type == TokenType.EOF
@pytest.mark.parametrize(
"src,expected",
[
("(", TokenType.LEFT_PAREN),
(")", TokenType.RIGHT_PAREN),
("[", TokenType.LEFT_BRACKET),
("]", TokenType.RIGHT_BRACKET),
(":", TokenType.COLON),
(",", TokenType.COMMA),
("_", TokenType.UNDERSCORE),
],
)
def test_punctuation(src: str, expected: TokenType):
tokens: list[Token] = scan(src)
assert_n_tokens(tokens, 1)
assert tokens[0].type == expected
@pytest.mark.parametrize(
"src,expected",
[
("+", TokenType.PLUS),
(">", TokenType.GREATER),
(">=", TokenType.GREATER_EQUAL),
("<", TokenType.LESS),
("<=", TokenType.LESS_EQUAL),
("=", TokenType.EQUAL),
("==", TokenType.EQUAL_EQUAL),
("!=", TokenType.BANG_EQUAL),
],
)
def test_operators(src: str, expected: TokenType):
tokens: list[Token] = scan(src)
assert_n_tokens(tokens, 1)
assert tokens[0].type == expected
@pytest.mark.parametrize(
"src,expected",
[
("a", TokenType.IDENTIFIER),
("foo", TokenType.IDENTIFIER),
("foo1", TokenType.IDENTIFIER),
("foo_", TokenType.IDENTIFIER),
("foo_bar1_baz2", TokenType.IDENTIFIER),
("FOO_BAR1_BAZ2", TokenType.IDENTIFIER),
("True", TokenType.TRUE),
("False", TokenType.FALSE),
("None", TokenType.NONE),
],
)
def test_identifiers_keywords(src: str, expected: TokenType):
tokens: list[Token] = scan(src)
assert_n_tokens(tokens, 1)
assert tokens[0].type == expected
@pytest.mark.parametrize(
"src,expected",
[
("#", TokenType.COMMENT),
("# This is a comment", TokenType.COMMENT),
(" ", TokenType.WHITESPACE),
("\t", TokenType.WHITESPACE),
("\r", TokenType.WHITESPACE),
(" \t \t", TokenType.WHITESPACE),
("\n", TokenType.NEWLINE),
],
)
def test_misc(src: str, expected: TokenType):
tokens: list[Token] = scan(src)
assert_n_tokens(tokens, 1)
assert tokens[0].type == expected
@pytest.mark.parametrize(
"src,expected_type,expected_value",
[
("0", TokenType.NUMBER, 0),
("0.0", TokenType.NUMBER, 0),
("1234.56", TokenType.NUMBER, 1234.56),
],
)
def test_literals(src: str, expected_type: TokenType, expected_value: Any):
tokens: list[Token] = scan(src)
assert_n_tokens(tokens, 1)
assert tokens[0].type == expected_type
assert tokens[0].value == expected_value
def test_single_bang_error():
with pytest.raises(SyntaxError):
scan("!")
@pytest.mark.parametrize(
"src",
[
"-",
"*",
"/",
"{",
"}",
"@",
'"',
"'",
".",
],
)
def test_unexpected_character(src: str):
with pytest.raises(SyntaxError):
scan(src)

View File

@@ -0,0 +1,130 @@
from typing import Optional
import pytest
from core.ast.annotations import (
AnnotationStmt,
ConstraintExpr,
Expr,
LiteralExpr,
SchemaElementExpr,
SchemaExpr,
Stmt,
TypeExpr,
WildcardExpr,
)
from lexer.annotations import AnnotationLexer
from lexer.position import Position
from lexer.token import Token
from parser.annotations import AnnotationParser
class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]):
def serialize(self, stmt: Stmt):
return stmt.accept(self)
def visit_annotation_stmt(self, stmt: AnnotationStmt) -> str:
schema: str = ""
if stmt.schema is not None:
schema = " " + stmt.schema.accept(self)
return f"(annotation {stmt.name.lexeme}{schema})"
def visit_schema_expr(self, expr: SchemaExpr) -> str:
elements: list[str] = [elmt.accept(self) for elmt in expr.elements]
return f"(schema {' '.join(elements)})"
def visit_schema_element_expr(self, expr: SchemaElementExpr) -> str:
name: str = expr.name.lexeme if expr.name is not None else "_"
type: str = expr.type.accept(self) if expr.type is not None else "_"
return f"({name} {type})"
def visit_type_expr(self, expr: TypeExpr) -> str:
res: str = f"({expr.name.lexeme}"
for constraint in expr.constraints:
res += " " + constraint.accept(self)
res += ")"
return res
def visit_constraint_expr(self, expr: ConstraintExpr) -> str:
return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})"
def visit_wildcard_expr(self, expr: WildcardExpr) -> str:
return "(_)"
def visit_literal_expr(self, expr: LiteralExpr) -> str:
return f"({expr.value})"
def parse(source: str) -> Optional[Stmt]:
tokens: list[Token] = AnnotationLexer(source).process()
return AnnotationParser(tokens).parse()
def must_parse(source: str) -> Stmt:
stmt: Optional[Stmt] = parse(source)
assert stmt is not None
return stmt
def ast_str(source: str) -> str:
stmt: Stmt = must_parse(source)
return AstSerializer().serialize(stmt)
@pytest.mark.parametrize(
"src,expected",
[
("Type", "(annotation Type)"),
("Type[]", "(annotation Type (schema ))"),
(
"""
Frame[
verified: bool,
birth_year: int,
height: float + ( _ > 0 ) + ( _ < 250 ),
name: str,
date: datetime,
float, # unnamed
unknown: _, # untyped
_ # unnamed and untyped
]
""",
"(annotation Frame (schema (verified (bool)) (birth_year (int)) (height (float (constraint (_) > (0.0)) (constraint (_) < (250.0)))) (name (str)) (date (datetime)) (_ (float)) (unknown _) (_ _)))",
),
],
)
def test_expressions(src: str, expected: str):
assert ast_str(src) == expected
@pytest.mark.parametrize(
"src,pos,should_fail",
[
("", (1, 1), True),
("42", (1, 1), True),
("True", (1, 1), True),
("Type[", (1, 6), True),
("Type[] Type2", (1, 8), False),
("Type[bool:]", (1, 11), True),
("Type[3]", (1, 6), True),
("Type[bool float]", (1, 11), True),
("Type[bool (_ < 2)]", (1, 11), True),
("Type[bool + _ < 2)]", (1, 13), True),
("Type[bool + (_ < 2]", (1, 19), True),
("Type[bool + (< 2)]", (1, 14), True),
("Type[bool + (_ + 2)]", (1, 16), True),
("Type[bool + (Foo + Bar)]", (1, 14), True),
# ("Type[bool,]", (1, 11), True), # trailing comma is accepted, TODO: update parser or EBNF
("Type[bool, Type[]]", (1, 16), True),
("Type[foo: 3]", (1, 11), True),
],
)
def test_parsing_error(src: str, pos: tuple[int, int], should_fail: bool):
tokens: list[Token] = AnnotationLexer(src).process()
parser: AnnotationParser = AnnotationParser(tokens)
stmt: Optional[Stmt] = parser.parse()
if should_fail:
assert stmt is None
assert len(parser.errors) != 0
error_pos: Position = parser.errors[0].token.position
assert (error_pos.line, error_pos.column) == pos

View File

@@ -0,0 +1,202 @@
import textwrap
import pytest
from core.ast.midas import (
ConstraintExpr,
ConstraintStmt,
Expr,
LiteralExpr,
OpStmt,
PropertyStmt,
Stmt,
TypeBodyExpr,
TypeExpr,
TypeStmt,
WildcardExpr,
)
from lexer.midas import MidasLexer
from lexer.position import Position
from lexer.token import Token
from parser.midas import MidasParser
class AstSerializer(Stmt.Visitor[str], Expr.Visitor[str]):
def serialize(self, stmt: Stmt):
return stmt.accept(self)
def visit_type_stmt(self, stmt: TypeStmt) -> str:
res: str = f"(type_def {stmt.name.lexeme}"
for base in stmt.bases:
res += " " + base.accept(self)
if stmt.body is not None:
res += " " + stmt.body.accept(self)
res += ")"
return res
def visit_type_expr(self, expr: TypeExpr) -> str:
res: str = f"({expr.name.lexeme}"
for constraint in expr.constraints:
res += " " + constraint.accept(self)
res += ")"
return res
def visit_constraint_expr(self, expr: ConstraintExpr) -> str:
return f"(constraint {expr.left.accept(self)} {expr.op.lexeme} {expr.right.accept(self)})"
def visit_wildcard_expr(self, expr: WildcardExpr) -> str:
return "(_)"
def visit_literal_expr(self, expr: LiteralExpr) -> str:
return f"({expr.value})"
def visit_type_body_expr(self, expr: TypeBodyExpr) -> str:
res: str = "(body"
for prop in expr.properties:
res += " " + prop.accept(self)
res += ")"
return res
def visit_property_stmt(self, stmt: PropertyStmt) -> str:
return f"(property {stmt.name.lexeme} {stmt.type.accept(self)})"
def visit_op_stmt(self, stmt: OpStmt) -> str:
left: str = stmt.left.accept(self)
right: str = stmt.right.accept(self)
result: str = stmt.result.accept(self)
return f"(op_def {left} {stmt.op.lexeme} {right} {result})"
def visit_constraint_stmt(self, stmt: ConstraintStmt) -> str:
return f"(constraint_def {stmt.name.lexeme} {stmt.constraint.accept(self)})"
def parse(source: str) -> list[Stmt]:
tokens: list[Token] = MidasLexer(source).process()
return MidasParser(tokens).parse()
def ast_str(source: str) -> list[str]:
stmts: list[Stmt] = parse(source)
return [AstSerializer().serialize(stmt) for stmt in stmts]
@pytest.mark.parametrize(
"src,expected",
[
("type Foo<>", "(type_def Foo)"),
("type Foo<Bar>", "(type_def Foo (Bar))"),
("type Foo<Bar, Baz>", "(type_def Foo (Bar) (Baz))"),
(
"type Foo<Bar + (_ < 2), Baz>",
"(type_def Foo (Bar (constraint (_) < (2.0))) (Baz))",
),
(
"""
type Foo<> {
foo: Bar
}
""",
"(type_def Foo (body (property foo (Bar))))",
),
(
"""
type Foo<> {
foo: Bar + (_ != none)
foo2: Bar2 + (0 <= _) + (_ <= 100)
}
""",
"(type_def Foo (body (property foo (Bar (constraint (_) != (None)))) (property foo2 (Bar2 (constraint (0.0) <= (_)) (constraint (_) <= (100.0))))))",
),
("op <A> + <B> = <C>", "(op_def (A) + (B) (C))"),
(
"op <A + (_ < 100)> + <B + (_ < 100)> = <C + (_ < 200)>",
"(op_def (A (constraint (_) < (100.0))) + (B (constraint (_) < (100.0))) (C (constraint (_) < (200.0))))",
),
(
"constraint Positive = _ >= 0",
"(constraint_def Positive (constraint (_) >= (0.0)))",
),
],
)
def test_expressions(src: str, expected: str | list[str]):
if isinstance(expected, str):
expected = [expected]
assert ast_str(src) == expected
@pytest.mark.parametrize(
"src,pos",
[
###
# Misc
###
("42", (1, 1)),
("true", (1, 1)),
("foo", (1, 1)),
###
# Type statements
###
("type", (1, 5)),
("type true", (1, 6)),
("type Foo", (1, 9)),
("type Foo<1>", (1, 10)),
# ("type Foo<float,>", (1, 16)), # trailing comma is accepted, TODO: update parser or EBNF
("type Foo<float, 1>", (1, 17)),
("type Foo<float", (1, 15)),
("type Foo<float> { 3 }", (1, 19)),
(
"""
type Foo<float> {
foo
}
""",
(4, 1),
),
(
"""
type Foo<float> {
foo: 3
}
""",
(3, 10),
),
###
# Operation statements
###
("op", (1, 3)),
("op float", (1, 4)),
("op <", (1, 5)),
("op <float", (1, 10)),
("op <float>", (1, 11)),
("op <float> +", (1, 13)),
("op <float> + float", (1, 14)),
("op <float> + <", (1, 15)),
("op <float> + <float", (1, 20)),
("op <float> + <float>", (1, 21)),
("op <float> + <float> =", (1, 23)),
("op <float> + <float> = float", (1, 24)),
("op <float> + <float> = <", (1, 25)),
("op <float> + <float> = <float", (1, 30)),
("op <float + 3> + <float> = <float>", (1, 13)),
("op <float> + <float + 3> = <float>", (1, 23)),
("op <float> + <float> = <float + 3>", (1, 33)),
###
# Constraint statements
###
("constraint", (1, 11)),
("constraint 3", (1, 12)),
("constraint Foo", (1, 15)),
("constraint Foo =", (1, 17)),
("constraint Foo = 3", (1, 19)),
("constraint Foo = 3 <", (1, 21)),
],
)
def test_parsing_error(src: str, pos: tuple[int, int]):
src = textwrap.dedent(src)
tokens: list[Token] = MidasLexer(src).process()
parser: MidasParser = MidasParser(tokens)
stmt: list[Stmt] = parser.parse()
assert len(stmt) == 0
assert len(parser.errors) != 0
error_pos: Position = parser.errors[0].token.position
assert (error_pos.line, error_pos.column) == pos

View File

@@ -0,0 +1,19 @@
{
"brackets": [
["{", "}"],
["[", "]"],
["<", ">"]
],
"autoClosingPairs": [
{ "open": "{", "close": "}" },
{ "open": "[", "close": "]" },
{ "open": "(", "close": ")" },
{ "open": "<", "close": ">" }
],
"surroundingPairs": [
["{", "}"],
["[", "]"],
["(", ")"],
["<", ">"]
]
}

33
vscode-ext/package.json Normal file
View File

@@ -0,0 +1,33 @@
{
"name": "midas",
"version": "0.1.0",
"engines": {
"vscode": "*"
},
"categories": ["Programming Languages"],
"contributes": {
"languages": [
{
"id": "midas",
"extensions": [
".mpy",
".midas"
],
"aliases": [
"Midas"
],
"configuration": "./language-configuration.json"
}
],
"grammars": [
{
"language": "midas",
"scopeName": "source.midas",
"path": "./syntaxes/midas.tmLanguage.json",
"embeddedLanguages": {
"meta.embedded.block.python": "python"
}
}
]
}
}

View File

@@ -0,0 +1,135 @@
{
"$schema": "https://raw.githubusercontent.com/martinring/tmlanguage/master/tmlanguage.json",
"name": "Midas",
"scopeName": "source.midas",
"patterns": [{ "include": "#statement" }],
"repository": {
"comment": {
"begin": "(//)",
"end": "($)",
"name": "comment.line",
"beginCaptures": {
"1": {
"name": "comment.line.double-dash"
}
}
},
"type-def": {
"begin": "\\b(type)\\s+([a-zA-Z_][a-zA-Z_\\d]*)",
"end": "$",
"beginCaptures": {
"1": {
"name": "keyword.control.type.midas"
},
"2": {
"name" : "variable.name"
}
},
"patterns": [
{ "include": "#type-base" },
{ "include": "#type-body" }
]
},
"type-base": {
"begin": "<",
"end": ">",
"beginCaptures": {
"0": {
"name": "punctuation.definition.base.begin.midas"
}
},
"endCaptures": {
"0": {
"name": "punctuation.definition.base.end.midas"
}
},
"patterns": [
{"include": "source.python"}
]
},
"type-body": {
"begin": "\\{",
"end": "\\}",
"beginCaptures": {
"0": {
"name": "punctuation.definition.type-body.begin.midas"
}
},
"endCaptures": {
"0": {
"name": "punctuation.definition.type-body.end.midas"
}
},
"patterns": [
{"include": "#type-prop"}
]
},
"type-prop": {
"match": "([a-zA-Z_][a-zA-Z_\\d]*)(:)\\s*([a-zA-Z_][a-zA-Z_\\d]*)",
"captures": {
"1": {
"name": "variable.name"
},
"2": {
"name": "punctuation.separator.annotation.midas"
},
"3": {
"name": "meta.type.name"
}
}
},
"op-def": {
"match": "\\b(op)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>\\s+(\\S+)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>\\s+(=)\\s+<([a-zA-Z_][a-zA-Z_\\d]*)>",
"captures": {
"1": {
"name": "keyword.control.op.midas"
},
"2": {
"name" : "variable.name"
},
"3": {
"name" : "keyword.operator"
},
"4": {
"name" : "variable.name"
},
"5": {
"name" : "keyword.operator.assignment"
},
"6": {
"name" : "variable.name"
}
},
"patterns": [
{ "include": "#type-base" },
{ "include": "#type-body" }
]
},
"constr-def": {
"begin": "(constraint)\\s+([a-zA-Z_][a-zA-Z_\\d]*)\\s*(=)",
"end": "$",
"beginCaptures": {
"1": {
"name": "keyword.control.constr.midas"
},
"2": {
"name": "variable.name"
},
"3": {
"name": "keyword.operator.assignment"
}
},
"patterns": [
{ "include": "source.python" }
]
},
"statement": {
"patterns": [
{ "include": "#comment" },
{ "include": "#type-def" },
{ "include": "#op-def" },
{ "include": "#constr-def" }
]
}
}
}