Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 74 additions & 48 deletions training/Makefile
Original file line number Diff line number Diff line change
@@ -1,48 +1,74 @@
CC = xcrun clang
CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc
FRAMEWORKS = -framework Foundation -framework CoreML -framework IOSurface
LDFLAGS = $(FRAMEWORKS) -ldl

HEADERS_LARGE = stories_config.h stories_io.h stories_mil.h stories_cpu_ops.h

HEADERS_ANE = $(HEADERS_LARGE) ane_rmsnorm_bwd.h ane_classifier.h

train: train.m ane_runtime.h ane_mil_gen.h model.h forward.h backward.h
$(CC) $(CFLAGS) -o $@ train.m $(LDFLAGS)

train_large: train_large.m $(HEADERS_LARGE)
$(CC) $(CFLAGS) -o $@ train_large.m $(LDFLAGS) -framework Accelerate

train_large_ane: train_large_ane.m $(HEADERS_ANE)
$(CC) $(CFLAGS) -o $@ train_large_ane.m $(LDFLAGS) -framework Accelerate

PROBES = test_weight_reload test_perf_stats test_qos_sweep test_ane_advanced

test_rmsnorm_bwd: test_rmsnorm_bwd.m $(HEADERS_ANE)
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -framework Accelerate

test_classifier: test_classifier.m $(HEADERS_ANE)
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -framework Accelerate

test_weight_reload: test_weight_reload.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_perf_stats: test_perf_stats.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_qos_sweep: test_qos_sweep.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_ane_advanced: test_ane_advanced.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

probes: $(PROBES)

tokenize:
python3 tokenize.py

clean:
rm -f train train_large train_large_ane $(PROBES) test_rmsnorm_bwd test_classifier

.PHONY: clean tokenize probes

CC = xcrun clang
CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc
FRAMEWORKS = -framework Foundation -framework CoreML -framework IOSurface
LDFLAGS = $(FRAMEWORKS) -ldl

# Universal binary flags: M1 (arm64) through M4
# -arch arm64 covers all Apple Silicon generations
ARCH_FLAGS = -arch arm64
UNIVERSAL_CFLAGS = $(CFLAGS) $(ARCH_FLAGS)

# Header dependency groups
HEADERS_CORE = ane_runtime.h ane_mil_gen.h model.h forward.h backward.h
HEADERS_COMPAT = ane_hw_detect.h ane_compat.h ane_mem_budget.h
HEADERS_LARGE = stories_config.h stories_io.h stories_mil.h stories_cpu_ops.h
HEADERS_ANE = $(HEADERS_LARGE) ane_rmsnorm_bwd.h ane_classifier.h

# === Primary targets ===

train: train.m $(HEADERS_CORE)
$(CC) $(UNIVERSAL_CFLAGS) -o $@ train.m $(LDFLAGS)

train_large: train_large.m $(HEADERS_LARGE)
$(CC) $(UNIVERSAL_CFLAGS) -o $@ train_large.m $(LDFLAGS) -framework Accelerate

train_large_ane: train_large_ane.m $(HEADERS_ANE)
$(CC) $(UNIVERSAL_CFLAGS) -o $@ train_large_ane.m $(LDFLAGS) -framework Accelerate

# === M1/M2/M3/M4 compatibility test ===

test_m2_compatibility: test_m2_compatibility.m $(HEADERS_CORE) $(HEADERS_COMPAT)
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS) -framework Accelerate

# === Existing probes & tests ===

PROBES = test_weight_reload test_perf_stats test_qos_sweep test_ane_advanced

test_rmsnorm_bwd: test_rmsnorm_bwd.m $(HEADERS_ANE)
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS) -framework Accelerate

test_classifier: test_classifier.m $(HEADERS_ANE)
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS) -framework Accelerate

test_weight_reload: test_weight_reload.m
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS)

test_perf_stats: test_perf_stats.m
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS)

test_qos_sweep: test_qos_sweep.m
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS)

test_ane_advanced: test_ane_advanced.m
$(CC) $(UNIVERSAL_CFLAGS) -o $@ $< $(LDFLAGS)

probes: $(PROBES)

# === Convenience targets ===

# Build everything for universal M1/M2/M3/M4 compatibility
all: train train_large train_large_ane test_m2_compatibility

# Quick compatibility check (compile only, no run)
compat-check: test_m2_compatibility
@echo "Compatibility test binary built OK for all Apple Silicon generations"

tokenize:
python3 tokenize.py

clean:
rm -f train train_large train_large_ane test_m2_compatibility \
$(PROBES) test_rmsnorm_bwd test_classifier

.PHONY: clean tokenize probes all compat-check

Loading