Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1 +1,7 @@
/rust/target/

# Python build / bytecode
__pycache__/
*.pyc
*.pyo
.pytest_cache/
75 changes: 75 additions & 0 deletions python/forge_alloy/domains/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
"""forge_alloy.domains — registered vocabularies for the universal core.

Each domain extension is a registered vocabulary for one universe of
data transformation pipelines. Adding a new domain (photo-provenance,
ticketing, delivery, compute-receipt, ...) is exactly one new file in
this package + one import line below. The universal forge_alloy.types
core never changes when a new domain ships.

Architectural rule: NEVER add domain-specific stage types or root
extension fields to forge_alloy/types.py. The dependency direction is
strict: extensions → core, never core → extensions. The bd4349d
checkpoint commit on the domain-extensibility-refactor branch was the
wrong-layered first attempt (ML fields bolted into the universal core);
this package is the correct layer.

Currently registered:
llm-forge ML model forging (the morning's qwen3-coder-30b-a3b
artifact and the rest of the continuum-ai/* catalog
all declare this domain implicitly)
photo-provenance stub — camera enclave → edits → publish chain
ticketing stub — venue tickets, FedEx delivery, concerts

Stubs exist as witnesses that the registry handles non-ML domains. When
real photo-provenance or ticketing artifacts ship, the stubs get filled
in with concrete Pydantic schemas.
"""

from .base import DomainExtension
from .registry import DomainRegistry

# Module-level singleton — the canonical registry the universal core
# composes its discriminated stage union from at validation time.
_REGISTRY = DomainRegistry()


def register_domain(ext_class: type[DomainExtension]) -> type[DomainExtension]:
"""Register a DomainExtension subclass with the singleton."""
return _REGISTRY.register(ext_class)


def resolve_domain(domain_id: str) -> DomainExtension:
"""Look up and instantiate the domain extension for an id string.

Used by the universal core when validating an alloy whose domains[]
field declares this id. Raises KeyError with a clear message naming
what IS registered if the id isn't known — loud failure pointing
at the missing extension file.
"""
return _REGISTRY.resolve(domain_id)


def registered_domains() -> list[str]:
"""All registered domain id strings, sorted."""
return _REGISTRY.domains()


# Importing each concrete extension module triggers the register() call
# below. Order doesn't matter; the registry is keyed by id, not by import
# order. NEW domain = new module + new import line + new register() call.
from . import llm_forge # noqa: E402,F401
from . import photo_provenance # noqa: E402,F401
from . import ticketing # noqa: E402,F401

_REGISTRY.register(llm_forge.LlmForgeDomain)
_REGISTRY.register(photo_provenance.PhotoProvenanceDomain)
_REGISTRY.register(ticketing.TicketingDomain)


__all__ = [
"DomainExtension",
"DomainRegistry",
"register_domain",
"resolve_domain",
"registered_domains",
]
65 changes: 65 additions & 0 deletions python/forge_alloy/domains/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""DomainExtension ABC — the contract every forge-alloy domain extension satisfies.

A domain extension is a registered vocabulary for one universe of data
transformation pipelines:

llm-forge ML model forging (prune, train, expert-prune, quant, eval, ...)
photo-provenance Camera enclave → edits → publish chain (capture, edit, publish)
ticketing Venue ticket batches (issued, transferred, scanned)
delivery Package waypoints (picked-up, in-transit, delivered)
compute-receipt Grid job receipts (job-submitted, completed, attested)

The universal forge-alloy core knows nothing about any specific domain.
It enforces the Merkle chain-of-custody walk and the integrity attestation
surface. The vocabulary for "what stages exist" comes from the registered
domain extensions, not from the core.

Each extension owns:
- id: a string the alloy's domains[] field carries
- stage_types(): dict of stage type name → Pydantic model class
(the schemas the alloy's stages[] entries validate against)
- root_extensions(): dict of root field name → Pydantic model class
(additional fields this domain adds at the alloy root)

Concrete extensions live in sibling files: llm_forge.py, photo_provenance.py,
ticketing.py, etc. Each registers itself with the singleton in __init__.py
on package import.
"""

from __future__ import annotations

from abc import ABC, abstractmethod
from typing import Any


class DomainExtension(ABC):
"""Abstract base for one domain's vocabulary."""

# Subclass MUST set this — the string the alloy's domains[] field carries.
# Examples: 'llm-forge', 'photo-provenance', 'ticketing', 'delivery'.
id: str = ""

@abstractmethod
def stage_types(self) -> dict[str, type]:
"""Return the stage type registry for this domain.

Maps stage type strings (the alloy's stages[].type field) to
Pydantic model classes that validate the stage's params. The
universal core's discriminated stage union is composed from
the union of every registered domain's stage_types().
"""
...

@abstractmethod
def root_extensions(self) -> dict[str, type]:
"""Return the root-extension field registry for this domain.

Maps root field names (e.g. 'priorMetricBaselines',
'calibrationCorpora') to Pydantic model classes. These fields
are additions to the alloy root that this domain owns; the
universal core ignores them.
"""
...

def __repr__(self) -> str:
return f"<{type(self).__name__} id={self.id!r}>"
162 changes: 162 additions & 0 deletions python/forge_alloy/domains/llm_forge.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
"""llm-forge — the ML model forging domain extension.

This domain owns the entire vocabulary for forging ML models: prune,
train, lora, expert-prune, expert-activation-profile, compensation-lora,
context-extend, modality, quant, eval, publish, package, deploy, deliver,
source-config. It also owns the §4.1.3.4 falsifiability anchor structures
(PriorMetricBaseline) and the §4.1.3.4.1 calibration-corpus discipline
gate structures (CalibrationCorpusRef).

Relationship to forge_alloy.types (the universal core):
The bd4349d checkpoint commit on this branch bolted ML-specific
fields directly into types.py — that was the wrong layer. The
correct architecture is: types.py is a domain-agnostic envelope
(Merkle chain of custody, integrity attestation, source/target,
publication metadata), and EVERY ML-specific concept lives here in
llm_forge.py.

Today, this module RE-EXPORTS the ML types from types.py to satisfy
the LlmForgeDomain.stage_types() contract while consumers (sentinel-ai,
Continuum's Factory widget) still import from forge_alloy directly.
The full extraction (moving the actual class definitions out of
types.py and into this file) is a follow-up commit that lands as
a pure refactor — every cached alloy still validates because the
re-exported names are identical.

The dependency direction is strict: extensions → core, never
core → extensions. types.py NEVER imports from forge_alloy.domains.
This is enforced by test_universal_core_does_not_import_llm_forge
in test_domain_extension_layout.py.

Reproducibility contract: this domain extension MUST stay frozen against
the published continuum-ai/* alloy catalog. New ML methodology arrives
as NEW stage types or NEW alloy field discriminators registered here,
NEVER as edits to existing type definitions. The 17 published artifacts
all validate against the current contract; any change that breaks even
one of them is wrong.
"""

from __future__ import annotations

from .base import DomainExtension

# Re-export from the universal core's current location. The class
# definitions live in forge_alloy/types.py today (the bd4349d checkpoint
# state); this module re-exports them so the public API surface is
# stable while the universal-core extraction lands as a separate
# refactor commit. Consumers can import from EITHER:
# from forge_alloy import ExpertPruneStage (legacy public API)
# from forge_alloy.domains.llm_forge import ExpertPruneStage (new path)
# Both resolve to the same class object today.
from ..types import (
# Stage types (transform, input, output, bookend)
SourceConfigStage,
PruneStage,
TrainStage,
LoRAStage,
CompactStage,
QuantStage,
PackageStage,
EvalStage,
PublishStage,
DeployStage,
ExpertPruneStage,
ExpertActivationProfileStage,
CompensationLoRAStage,
ContextExtendStage,
ModalityStage,
# Result types
BenchmarkResult,
BenchmarkDef,
HardwareProfile,
GenerationSample,
AlloyResults,
# § 4.1.3.4 falsifiability + discipline gate structures
PriorMetricBaseline,
CalibrationCorpusRef,
# Hardware tier
AlloyHardware,
)


class LlmForgeDomain(DomainExtension):
"""The llm-forge domain extension. Registered against id 'llm-forge'."""

id = "llm-forge"

def stage_types(self) -> dict[str, type]:
"""Stage types this domain owns. Used by the universal core's
discriminated stage union when an alloy declares this domain in
its domains[] field."""
return {
"source-config": SourceConfigStage,
"prune": PruneStage,
"train": TrainStage,
"lora": LoRAStage,
"compact": CompactStage,
"quant": QuantStage,
"package": PackageStage,
"eval": EvalStage,
"publish": PublishStage,
"deploy": DeployStage,
"expert-prune": ExpertPruneStage,
"expert-activation-profile": ExpertActivationProfileStage,
"compensation-lora": CompensationLoRAStage,
"context-extend": ContextExtendStage,
"modality": ModalityStage,
# 'deliver' is a legacy alias used by older alloys for what is
# now called 'publish' — both resolve to PublishStage so the
# legacy alloys keep validating without a separate stage class.
"deliver": PublishStage,
}

def root_extensions(self) -> dict[str, type]:
"""Root-extension fields this domain adds to the alloy root.

These are the §4.1.3.4 / §4.1.3.4.1 structures from the
methodology paper:

calibrationCorpora list[CalibrationCorpusRef]
hash-pinned calibration corpora used by
any expert-activation-profile or
compensation-lora stage in this alloy
priorMetricBaselines list[PriorMetricBaseline]
superseded forge attempts preserved as
falsifiability anchors (the §4.1.3.4
negative-baseline pattern)
"""
return {
"calibrationCorpora": CalibrationCorpusRef,
"priorMetricBaselines": PriorMetricBaseline,
}


__all__ = [
"LlmForgeDomain",
# Stage types (re-exported for callers that import from this module)
"SourceConfigStage",
"PruneStage",
"TrainStage",
"LoRAStage",
"CompactStage",
"QuantStage",
"PackageStage",
"EvalStage",
"PublishStage",
"DeployStage",
"ExpertPruneStage",
"ExpertActivationProfileStage",
"CompensationLoRAStage",
"ContextExtendStage",
"ModalityStage",
# Result types
"BenchmarkResult",
"BenchmarkDef",
"HardwareProfile",
"GenerationSample",
"AlloyResults",
"AlloyHardware",
# § 4.1.3.4 structures
"PriorMetricBaseline",
"CalibrationCorpusRef",
]
63 changes: 63 additions & 0 deletions python/forge_alloy/domains/photo_provenance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""photo-provenance — Camera enclave → edits → publish chain of custody.

Stub domain that proves the registry mechanism is genuinely non-ML. The
photo-provenance use case from forge-alloy's APPLICATIONS.md:

A camera enclave signs the original capture (capture stage), every
edit in Photoshop / Lightroom / Affinity Photo records a signed
edit stage with the operation type and the editor's enclave key,
and the publish step on social media records the final stage with
the QR code embedded in EXIF. Anyone with the alloy can walk the
full chain of custody from sensor to feed and verify cryptographically
that no edit happened off-chain.

The actual stage schemas are placeholders today — when the first real
photo-provenance use case ships, this file gets the real Pydantic models
for capture / edit / publish stages and a real CameraAttestation +
EditAttestation root extension. For now the stub is a witness that the
registry handles non-ML domains without any change to the universal core
or to llm_forge.

Reproducibility contract: photo-provenance alloys are NOT in the test
catalog yet (no published artifacts use this domain). When they ship,
add them to the regression test alongside the continuum-ai/* alloys.
"""

from __future__ import annotations

from .base import DomainExtension


class PhotoProvenanceDomain(DomainExtension):
"""photo-provenance domain extension. Registered against id 'photo-provenance'."""

id = "photo-provenance"

def stage_types(self) -> dict[str, type]:
"""Stage types this domain owns. Currently empty stubs.

Real schemas would be:
capture → CameraCaptureStage
(sensorId, gpsHash, exif, signature)
edit → PhotoEditStage
(tool, operation, parameters, signature)
publish → PhotoPublishStage
(platform, postId, qrEmbed, signature)

The stage type strings are placeholders pending the first real
photo-provenance artifact's schema.
"""
return {
# Placeholder — concrete schemas land when the first
# photo-provenance artifact ships.
}

def root_extensions(self) -> dict[str, type]:
"""Root-extension fields. Currently empty.

Future:
cameraAttestation → CameraAttestation
(enclave certificate, public key,
attestation timestamp)
"""
return {}
Loading