Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions example.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,7 @@
print(f"\n[{attn_type.upper()}] Parameters: {total:,}")

ids = torch.randint(0, cfg.vocab_size, (2, 16))
logits = model(ids, n_loops=4)
print(f"[{attn_type.upper()}] Logits shape: {logits.shape}")
logits, _ = model(ids, n_loops=4)

out = model.generate(ids, max_new_tokens=8, n_loops=8)
print(f"[{attn_type.upper()}] Generated shape: {out.shape}")
Expand Down
80 changes: 78 additions & 2 deletions open_mythos/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,37 @@
"""OpenMythos — Recurrent-Depth Transformer (100x Enhanced Edition).

An open-source implementation of the Claude Mythos Recurrent-Depth Transformer
architecture with major enhancements:

Architecture:
- Vectorized MoE dispatch (scatter/gather, 50-200x faster dispatch)
- NTK-aware RoPE scaling for context length extrapolation
- KV-cache eviction for unlimited context windows
- Gradient checkpointing for memory-efficient training

Generation:
- Nucleus (top-p) sampling
- Min-p sampling
- Repetition penalty
- Streaming generation (generate_stream)
- EOS token stopping

Training:
- Full Trainer with mixed precision (bf16/fp16/fp32)
- Cosine LR schedule with warmup
- Gradient accumulation + clipping
- Auto checkpoint save/resume
- WandB + TensorBoard logging
- DDP distributed training

Developer experience:
- Config validation with helpful error messages
- model.save() / OpenMythos.load()
- model.num_parameters() / parameter_summary()
- Benchmarking suite (throughput, latency, MoE entropy, ACT depth)
- torch.compile() compatible
"""

from open_mythos.main import (
ACTHalting,
Expert,
Expand Down Expand Up @@ -25,8 +59,32 @@
mythos_100b,
mythos_500b,
)
from open_mythos.training import (
TrainingConfig,
Trainer,
CheckpointManager,
MetricsTracker,
build_optimizer,
get_cosine_schedule_with_warmup,
simple_token_iterator,
compute_perplexity,
)
from open_mythos.bench import (
BenchResult,
benchmark_forward,
benchmark_generate,
analyze_routing_entropy,
analyze_act_depth,
run_quick_benchmark,
model_memory_mb,
)

__version__ = "1.0.0-enhanced"

__all__ = [
# Version
"__version__",
# Core model
"MythosConfig",
"RMSNorm",
"GQAttention",
Expand All @@ -39,17 +97,35 @@
"ACTHalting",
"RecurrentBlock",
"OpenMythos",
# RoPE utilities
"precompute_rope_freqs",
"apply_rope",
"loop_index_embedding",
# Model variants
"mythos_1b",
"mythos_3b",
"mythos_10b",
"mythos_50b",
"mythos_100b",
"mythos_500b",
"mythos_1t",
"load_tokenizer",
"get_vocab_size",
# Tokenizer
"MythosTokenizer",
# Training
"TrainingConfig",
"Trainer",
"CheckpointManager",
"MetricsTracker",
"build_optimizer",
"get_cosine_schedule_with_warmup",
"simple_token_iterator",
"compute_perplexity",
# Benchmarking
"BenchResult",
"benchmark_forward",
"benchmark_generate",
"analyze_routing_entropy",
"analyze_act_depth",
"run_quick_benchmark",
"model_memory_mb",
]
Loading