Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 26 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,13 @@ type OrderItem {
field unit_price: Decimal
}

enum OrderStatus { Pending Confirmed Shipped Delivered Cancelled }
enum OrderStatus {
Pending
Confirmed
Shipped
Delivered
Cancelled
}

interface OrderRequest {
field order_id: String
Expand Down Expand Up @@ -94,9 +100,14 @@ system ECommerce {
provides InventoryStatus
}

connect OrderService -> PaymentGateway by PaymentRequest
connect OrderService -> InventoryManager by InventoryCheck { protocol = "HTTP" }
connect PaymentGateway -> StripeAPI by PaymentRequest { protocol = "HTTP" async = true }
connect OrderService -> PaymentGateway by PaymentRequest
connect OrderService -> InventoryManager by InventoryCheck {
protocol = "HTTP"
}
connect PaymentGateway -> StripeAPI by PaymentRequest {
protocol = "HTTP"
async = true
}
}
```

Expand All @@ -123,6 +134,17 @@ Primitive types: `String`, `Int`, `Float`, `Decimal`, `Bool`, `Bytes`, `Timestam
Container types: `List<T>`, `Map<K, V>`, `Optional<T>`
Filesystem types: `File` (with `filetype`, `schema`), `Directory` (with `schema`)

Multi-line descriptions use triple-quoted strings:

```
description = """
Accepts and validates customer orders.
Delegates payment to PaymentGateway.
"""
```

Enum values and connection block attributes each occupy their own line — no commas needed.

Full syntax reference: [docs/LANGUAGE_SYNTAX.md](docs/LANGUAGE_SYNTAX.md)

## Installation
Expand Down
17 changes: 15 additions & 2 deletions docs/LANGUAGE_SYNTAX.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,18 @@ ArchML files use the `.archml` extension. A file contains one or more top-level

Strings use double quotes. Identifiers are unquoted alphanumeric names with underscores (e.g., `order_service`). Every named entity has an optional human-readable `title` and `description`.

Multi-line text is written with triple-quoted strings (`"""`):

```
description = """
Accepts and validates customer orders.
Delegates payment processing to the PaymentGateway
and inventory checks to the InventoryManager.
"""
```

Single-quoted strings may not contain a literal newline character but support the same `\n`, `\t`, `\\`, `\"` escape sequences as triple-quoted strings.

## Type System

### Primitive Types
Expand Down Expand Up @@ -53,7 +65,7 @@ field artifact: Directory {

### Enumerations

The `enum` keyword defines a constrained set of named values:
The `enum` keyword defines a constrained set of named values. Each value must appear on its own line:

```
enum OrderStatus {
Expand Down Expand Up @@ -222,7 +234,7 @@ connect ServiceA -> ServiceB by RequestToB
connect ServiceB -> ServiceA by ResponseToA
```

Connections may carry annotations:
Connections may carry annotations. Each attribute must appear on its own line:

```
connect OrderService -> PaymentGateway by PaymentRequest {
Expand Down Expand Up @@ -463,6 +475,7 @@ system ECommerce {
connect PaymentGateway -> StripeAPI by PaymentRequest {
protocol = "HTTP"
async = true
description = "Delegate payment processing to Stripe."
}
}
```
Expand Down
37 changes: 32 additions & 5 deletions src/archml/compiler/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,11 +268,16 @@ def _parse_identifier_list(self) -> list[str]:
# ------------------------------------------------------------------

def _parse_enum(self) -> EnumDef:
"""Parse: enum <Name> { [attrs] <Value>* }"""
"""Parse: enum <Name> { [attrs] <Value>* }

Each enum value must appear on its own line (line number strictly
greater than the opening brace or the previous value).
"""
self._expect(TokenType.ENUM)
name_tok = self._expect(TokenType.IDENTIFIER)
self._expect(TokenType.LBRACE)
lbrace = self._expect(TokenType.LBRACE)
enum_def = EnumDef(name=name_tok.value)
last_value_line = lbrace.line
while not self._check(TokenType.RBRACE, TokenType.EOF):
if self._check(TokenType.TITLE):
enum_def.title = self._parse_string_attr(TokenType.TITLE)
Expand All @@ -281,7 +286,15 @@ def _parse_enum(self) -> EnumDef:
elif self._check(TokenType.TAGS):
enum_def.tags = self._parse_tags()
elif self._check(TokenType.IDENTIFIER):
value_tok = self._advance()
value_tok = self._current()
if value_tok.line <= last_value_line:
raise ParseError(
f"Enum value {value_tok.value!r} must be on a new line",
value_tok.line,
value_tok.column,
)
last_value_line = value_tok.line
self._advance()
enum_def.values.append(value_tok.value)
else:
tok = self._current()
Expand Down Expand Up @@ -482,7 +495,12 @@ def _parse_use_statement(self, system: System) -> None:
# ------------------------------------------------------------------

def _parse_connection(self) -> Connection:
"""Parse: connect <source> -> <target> by <interface> [@version] [{ ... }]"""
"""Parse: connect <source> -> <target> by <interface> [@version] [{ ... }]

Each attribute inside the annotation block must appear on its own line
(line number strictly greater than the opening brace or the previous
attribute's last token).
"""
self._expect(TokenType.CONNECT)
source_tok = self._expect(TokenType.IDENTIFIER)
self._expect(TokenType.ARROW)
Expand All @@ -500,9 +518,18 @@ def _parse_connection(self) -> Connection:
interface=InterfaceRef(name=iface_name_tok.value, version=version),
)
if self._check(TokenType.LBRACE):
self._advance() # consume {
lbrace = self._advance() # consume {
last_attr_line = lbrace.line
while not self._check(TokenType.RBRACE, TokenType.EOF):
attr_tok = self._current()
if attr_tok.line <= last_attr_line:
raise ParseError(
"Connection attributes must each be on a new line",
attr_tok.line,
attr_tok.column,
)
self._parse_connection_attr(conn)
last_attr_line = self._tokens[self._pos - 1].line
self._expect(TokenType.RBRACE)
return conn

Expand Down
65 changes: 64 additions & 1 deletion src/archml/compiler/scanner.py
Original file line number Diff line number Diff line change
Expand Up @@ -278,8 +278,25 @@ def _scan_token(self) -> None:
# ------------------------------------------------------------------

def _scan_string(self, line: int, col: int) -> None:
"""Scan a double-quoted string literal with escape sequences."""
"""Scan a double-quoted string literal.

Triple-quoted strings (\"\"\"...\"\"\") allow literal newlines in the
content and are used for multi-line descriptions. Single-quoted
strings support \\n, \\t, \\\\, and \\\" escape sequences but may not
contain a literal newline character.
"""
self._advance() # opening "
# Check for triple-quoted string: "" or """
if self._pos < len(self._source) and self._current() == '"':
self._advance() # second "
if self._pos < len(self._source) and self._current() == '"':
self._advance() # third "
self._scan_triple_quoted_string(line, col)
return
# Two quotes consumed → empty string
self._tokens.append(Token(TokenType.STRING, "", line, col))
return
# Regular single-quoted string
chars: list[str] = []
while self._pos < len(self._source):
ch = self._current()
Expand Down Expand Up @@ -314,6 +331,52 @@ def _scan_string(self, line: int, col: int) -> None:
self._advance()
raise LexerError("Unterminated string literal", line, col)

def _scan_triple_quoted_string(self, line: int, col: int) -> None:
"""Scan a triple-quoted string (\"\"\"...\"\"\").

Allows literal newlines. Supports the same escape sequences as
single-quoted strings (\\n, \\t, \\\\, \\\").
"""
chars: list[str] = []
while self._pos < len(self._source):
ch = self._current()
# Check for closing """
if (
ch == '"'
and self._pos + 2 < len(self._source)
and self._source[self._pos + 1] == '"'
and self._source[self._pos + 2] == '"'
):
self._advance() # first "
self._advance() # second "
self._advance() # third "
self._tokens.append(Token(TokenType.STRING, "".join(chars), line, col))
return
if ch == "\\":
self._advance()
if self._pos >= len(self._source):
raise LexerError("Unterminated triple-quoted string literal", line, col)
esc = self._current()
if esc == "n":
chars.append("\n")
elif esc == "t":
chars.append("\t")
elif esc == "\\":
chars.append("\\")
elif esc == '"':
chars.append('"')
else:
raise LexerError(
f"Invalid escape sequence: '\\{esc}'",
self._line,
self._column,
)
self._advance()
else:
chars.append(ch)
self._advance()
raise LexerError("Unterminated triple-quoted string literal", line, col)

def _scan_number(self, line: int, col: int) -> None:
"""Scan an integer or floating-point literal.

Expand Down
12 changes: 6 additions & 6 deletions src/archml/views/diagram.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ def render_diagram(data: DiagramData, output_path: Path) -> None:
# Custom node classes are defined locally to keep the diagrams import lazy
# (it is an optional dependency that requires Graphviz to be installed).

class _TerminalNode(Node): # type: ignore[misc]
class _TerminalNode(Node):
"""Styled box for an interface terminal (requires or provides)."""

_icon_dir = None
Expand All @@ -187,7 +187,7 @@ class _TerminalNode(Node): # type: ignore[misc]
"penwidth": "1.5",
}

class _EntityNode(Node): # type: ignore[misc]
class _EntityNode(Node):
"""Styled box for a leaf entity (component or system with no children)."""

_icon_dir = None
Expand All @@ -201,7 +201,7 @@ class _EntityNode(Node): # type: ignore[misc]
"penwidth": "2",
}

class _ChildNode(Node): # type: ignore[misc]
class _ChildNode(Node):
"""Styled box for a child component or system inside an entity cluster."""

_icon_dir = None
Expand Down Expand Up @@ -234,7 +234,7 @@ class _ChildNode(Node): # type: ignore[misc]
# that cross-cluster edges are handled correctly by Graphviz).
for conn in data.connections:
if conn.source in child_nodes and conn.target in child_nodes:
child_nodes[conn.source] >> Edge(label=conn.label) >> child_nodes[conn.target] # type: ignore[operator]
child_nodes[conn.source] >> Edge(label=conn.label) >> child_nodes[conn.target]

# Children with no incoming internal connection are natural entry
# points for requires terminals; those with no outgoing connection
Expand All @@ -259,11 +259,11 @@ class _ChildNode(Node): # type: ignore[misc]
# --- Terminal ↔ entity edges ---
for req_node in req_nodes.values():
for entry in entry_nodes:
req_node >> Edge() >> entry # type: ignore[operator]
req_node >> Edge() >> entry

for prov_node in prov_nodes.values():
for exit_node in exit_nodes:
exit_node >> Edge() >> prov_node # type: ignore[operator]
exit_node >> Edge() >> prov_node


# ################
Expand Down
5 changes: 4 additions & 1 deletion tests/compiler/test_artifact.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,10 @@ def test_artifact_file_is_valid_json(self, tmp_path: Path) -> None:
def test_roundtrip_parsed_file(self, tmp_path: Path) -> None:
"""Parsing a real .archml file and roundtripping through artifact."""
source = """
enum Status { Active Inactive }
enum Status {
Active
Inactive
}

type Config {
field timeout: Int
Expand Down
14 changes: 11 additions & 3 deletions tests/compiler/test_build.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,11 @@ def test_compiles_file_with_enum_and_type(self, tmp_path: Path) -> None:
_write(
src / "types.archml",
"""
enum Color { Red Green Blue }
enum Color {
Red
Green
Blue
}
type Point { field x: Int field y: Int }
""",
)
Expand Down Expand Up @@ -621,8 +625,12 @@ def test_multiple_semantic_errors_in_message(self, tmp_path: Path) -> None:
_write(
src / "bad.archml",
"""
enum Dup { A }
enum Dup { B }
enum Dup {
A
}
enum Dup {
B
}
""",
)
with pytest.raises(CompilerError, match="Semantic errors"):
Expand Down
8 changes: 6 additions & 2 deletions tests/compiler/test_compiler_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,8 +337,12 @@ def test_large_types_file_parses_and_passes(self) -> None:
def test_multiple_errors_in_one_file(self) -> None:
"""A file with many problems should report all of them, not just the first."""
source = """
enum Dup { A }
enum Dup { B }
enum Dup {
A
}
enum Dup {
B
}

type Bad { field x: UnknownType }

Expand Down
Loading