From 1fc842e23f9b35cc8eab590ba519cc61ff885a26 Mon Sep 17 00:00:00 2001
From: LordBaryhobal <lordbaryhobal@gmail.com>
Date: Wed, 13 May 2026 22:06:32 +0200
Subject: [PATCH] feat(parser): add basic lexer for type definitions

---
 .../00_syntax_prototype/02_custom_types.midas |   2 +-
 lexer/annotations.py                          |   8 +-
 lexer/keyword.py                              |   9 ++
 lexer/midas.py                                | 126 ++++++++++++++++++
 lexer/token.py                                |  17 +++
 5 files changed, 157 insertions(+), 5 deletions(-)
 create mode 100644 lexer/keyword.py
 create mode 100644 lexer/midas.py
diff --git a/examples/00_syntax_prototype/02_custom_types.midas b/examples/00_syntax_prototype/02_custom_types.midas
index ba8b758..8248e16 100644
--- a/examples/00_syntax_prototype/02_custom_types.midas
+++ b/examples/00_syntax_prototype/02_custom_types.midas
@@ -21,4 +21,4 @@ type Age<int + (0 <= _ < 150)>
 // Predefined custom constraints that can be referenced in other definitions
 constraint Positive = _ >= 0
 constraint StrictlyPositive = _ > 0
-constraint Even = _ % 2 == 0
\ No newline at end of file
+//constraint Even = _ % 2 == 0
\ No newline at end of file
diff --git a/lexer/annotations.py b/lexer/annotations.py
index b8c7cf7..3cc0431 100644
--- a/lexer/annotations.py
+++ b/lexer/annotations.py
@@ -46,7 +46,7 @@ class AnnotationLexer(Lexer):
 
     def scan_number(self):
         """Scan the rest of number and add it as a token
-        
+
         This method handles both simple integers and floats. Scientific notation
         and base prefixes (0x, 0b, 0o) are not supported
         """
@@ -63,7 +63,7 @@ class AnnotationLexer(Lexer):
 
     def scan_identifier(self):
         """Scan the rest of an identifier and add it as a token
-        
+
         An identifier starts with a letter, followed by any number of
         alphanumerical characters or underscores
         """
@@ -73,8 +73,8 @@ class AnnotationLexer(Lexer):
 
     def scan_comment(self):
         """Scan the rest of a comment and add it as a token
-        
-        A comment starts with a '#' character and ends at the EOL/EOF
+
+        A comment starts with a `#` character and ends at the EOL/EOF
         """
         while self.peek() != "\n" and not self.is_at_end():
             self.advance()
diff --git a/lexer/keyword.py b/lexer/keyword.py
new file mode 100644
index 0000000..a4f03cf
--- /dev/null
+++ b/lexer/keyword.py
@@ -0,0 +1,9 @@
+from lexer.token import TokenType
+
+KEYWORDS: dict[str, TokenType] = {
+    "type": TokenType.TYPE,
+    "op": TokenType.OP,
+    "constraint": TokenType.CONSTRAINT,
+    "true": TokenType.TRUE,
+    "false": TokenType.FALSE,
+}
diff --git a/lexer/midas.py b/lexer/midas.py
new file mode 100644
index 0000000..16440da
--- /dev/null
+++ b/lexer/midas.py
@@ -0,0 +1,126 @@
+from lexer.base import Lexer
+from lexer.keyword import KEYWORDS
+from lexer.token import TokenType
+
+
+class MidasLexer(Lexer):
+    def scan_token(self) -> None:
+        char: str = self.advance()
+        match char:
+            case "(":
+                self.add_token(TokenType.LEFT_PAREN)
+            case ")":
+                self.add_token(TokenType.RIGHT_PAREN)
+            case "[":
+                self.add_token(TokenType.LEFT_BRACKET)
+            case "]":
+                self.add_token(TokenType.RIGHT_BRACKET)
+            case "{":
+                self.add_token(TokenType.LEFT_BRACE)
+            case "}":
+                self.add_token(TokenType.RIGHT_BRACE)
+            case "<":
+                self.add_token(
+                    TokenType.LESS_EQUAL if self.match("=") else TokenType.LESS
+                )
+            case ">":
+                self.add_token(
+                    TokenType.GREATER_EQUAL if self.match("=") else TokenType.GREATER
+                )
+            case "=":
+                self.add_token(
+                    TokenType.EQUAL_EQUAL if self.match("=") else TokenType.EQUAL
+                )
+            case ":":
+                self.add_token(TokenType.COLON)
+            case ",":
+                self.add_token(TokenType.COMMA)
+            case "_":
+                self.add_token(TokenType.UNDERSCORE)
+            case "+":
+                self.add_token(TokenType.PLUS)
+            case "-":
+                self.add_token(TokenType.MINUS)
+            case "*":
+                self.add_token(TokenType.STAR)
+            case "/":
+                if self.match("/"):
+                    self.scan_comment()
+                elif self.match("*"):
+                    self.scan_comment_multiline()
+                else:
+                    self.add_token(TokenType.SLASH)
+            case "\n":
+                self.add_token(TokenType.NEWLINE)
+            case " " | "\r" | "\t":
+                # Consume all whitespace characters until EOL or EOF
+                while (
+                    self.peek().isspace()
+                    and self.peek() != "\n"
+                    and not self.is_at_end()
+                ):
+                    self.advance()
+                self.add_token(TokenType.WHITESPACE)
+            case _:
+                if char.isdigit():
+                    self.scan_number()
+                elif char.isalpha():
+                    self.scan_identifier()
+                else:
+                    self.error("Unexpected character")
+        return None
+
+    def scan_number(self):
+        """Scan the rest of number and add it as a token
+
+        This method handles both simple integers and floats. Scientific notation
+        and base prefixes (0x, 0b, 0o) are not supported
+        """
+        while self.peek().isdigit():
+            self.advance()
+
+        if self.peek() == "." and self.peek_next().isdigit():
+            self.advance()
+            while self.peek().isdigit():
+                self.advance()
+
+        value: float = float(self.source[self.start : self.idx])
+        self.add_token(TokenType.NUMBER, value)
+
+    def scan_identifier(self):
+        """Scan the rest of an identifier and add it as a token
+
+        An identifier starts with a letter, followed by any number of
+        alphanumerical characters or underscores
+        """
+        while self.peek().isalnum() or self.peek() == "_":
+            self.advance()
+
+        lexeme: str = self.source[self.start : self.idx]
+        token_type: TokenType = KEYWORDS.get(lexeme, TokenType.IDENTIFIER)
+        self.add_token(token_type)
+
+    def scan_comment(self):
+        """Scan the rest of a comment and add it as a token
+
+        A comment starts with `//` and ends at the EOL/EOF
+        """
+        while self.peek() != "\n" and not self.is_at_end():
+            self.advance()
+        self.add_token(TokenType.COMMENT)
+
+    def scan_comment_multiline(self):
+        """Scan the rest of a multiline comment and add it as a token
+
+        A multiline comment starts with `/*` and ends with `*/` or at the EOF
+        """
+        while (
+            not (self.peek() == "*" and self.peek_next() == "/")
+            and not self.is_at_end()
+        ):
+            self.advance()
+        if not self.is_at_end():
+            self.advance()
+        if not self.is_at_end():
+            self.advance()
+        self.add_token(TokenType.COMMENT)
diff --git a/lexer/token.py b/lexer/token.py
index e06194c..9b5bc13 100644
--- a/lexer/token.py
+++ b/lexer/token.py
@@ -11,12 +11,23 @@ class TokenType(Enum):
     RIGHT_PAREN = auto()
     LEFT_BRACKET = auto()
     RIGHT_BRACKET = auto()
+    LEFT_BRACE = auto()
+    RIGHT_BRACE = auto()
     COLON = auto()
     COMMA = auto()
     UNDERSCORE = auto()
 
     # Operators
     PLUS = auto()
+    MINUS = auto()
+    STAR = auto()
+    SLASH = auto()
+    GREATER = auto()
+    GREATER_EQUAL = auto()
+    LESS = auto()
+    LESS_EQUAL = auto()
+    EQUAL = auto()
+    EQUAL_EQUAL = auto()
 
     # Literals
     IDENTIFIER = auto()
@@ -25,6 +36,11 @@ class TokenType(Enum):
     FALSE = auto()
     NONE = auto()
 
+    # Keywords
+    TYPE = auto()
+    OP = auto()
+    CONSTRAINT = auto()
+
     # Misc
     COMMENT = auto()
     WHITESPACE = auto()
@@ -35,6 +51,7 @@ class TokenType(Enum):
 @dataclass(frozen=True)
 class Token:
     """A scanned token"""
+
     type: TokenType
     lexeme: str
     value: Any