Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,111 changes: 1,111 additions & 0 deletions docs/ANE_CHAINING_RESEARCH.md

Large diffs are not rendered by default.

563 changes: 563 additions & 0 deletions docs/ANE_INTERNALS.md

Large diffs are not rendered by default.

70 changes: 64 additions & 6 deletions training/Makefile
Original file line number Diff line number Diff line change
@@ -1,21 +1,36 @@
CC = xcrun clang
CFLAGS = -O2 -Wall -Wno-deprecated-declarations -fobjc-arc
CC_C = xcrun clang

ANE_COMPAT = -Wno-deprecated-declarations
SEC_FLAGS = -fstack-protector-strong -Wformat-security

CFLAGS = -O2 -Wall $(ANE_COMPAT) -fobjc-arc $(SEC_FLAGS)
CFLAGS_C = -O2 -Wall -Wextra -Werror -std=c11
CFLAGS_DEBUG = -O0 -g -Wall $(ANE_COMPAT) -fobjc-arc -fsanitize=address,undefined
FRAMEWORKS = -framework Foundation -framework CoreML -framework IOSurface
LDFLAGS = $(FRAMEWORKS) -ldl

HEADERS_LARGE = stories_config.h stories_io.h stories_mil.h stories_cpu_ops.h
HEADERS_LARGE = stories_config.h stories_io.h stories_mil.h stories_cpu_ops.h data_validation.h

HEADERS_ANE = $(HEADERS_LARGE) ane_rmsnorm_bwd.h ane_classifier.h

train: train.m ane_runtime.h ane_mil_gen.h model.h forward.h backward.h
$(CC) $(CFLAGS) -o $@ train.m $(LDFLAGS)
$(CC) $(CFLAGS) -o $@ train.m $(LDFLAGS) -framework Accelerate

train_large: train_large.m $(HEADERS_LARGE)
$(CC) $(CFLAGS) -o $@ train_large.m $(LDFLAGS) -framework Accelerate

train_large_ane: train_large_ane.m $(HEADERS_ANE)
$(CC) $(CFLAGS) -o $@ train_large_ane.m $(LDFLAGS) -framework Accelerate

HEADERS_OPT = $(HEADERS_LARGE) stories_cpu_ops_opt.h

train_opt: train_opt.m $(HEADERS_OPT)
$(CC) $(CFLAGS) -o $@ train_opt.m $(LDFLAGS) -framework Accelerate -framework Metal -framework MetalPerformanceShaders

train_double_buffer: train_double_buffer.m $(HEADERS_LARGE)
$(CC) $(CFLAGS) -o $@ train_double_buffer.m $(LDFLAGS) -framework Accelerate

PROBES = test_weight_reload test_perf_stats test_qos_sweep test_ane_advanced

test_rmsnorm_bwd: test_rmsnorm_bwd.m $(HEADERS_ANE)
Expand All @@ -36,13 +51,56 @@ test_qos_sweep: test_qos_sweep.m
test_ane_advanced: test_ane_advanced.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_chaining: test_chaining.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_chaining_v2: test_chaining_v2.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_bench_paths: test_bench_paths.m ane_runtime.h
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_ane_model: test_ane_model.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -framework Metal

test_throughput_ceiling: test_throughput_ceiling.m ane_runtime.h
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)

test_coreml_chaining: test_coreml_chaining.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -framework Metal

test_e5_validate: test_e5_validate.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -framework Metal

test_mil_custom: test_mil_custom.m
$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS) -framework Accelerate

test_data_validation: test_data_validation.c data_validation.h
$(CC_C) $(CFLAGS_C) -o $@ $<

probes: $(PROBES)

security-tests: test_data_validation

data: tokenize
@bash download_data.sh

tokenize:
python3 tokenize.py

clean:
rm -f train train_large train_large_ane $(PROBES) test_rmsnorm_bwd test_classifier
setup: data
@echo "=== Setup complete ==="
@echo "Data: tinystories_data00.bin"
@echo "To train: make train_large && ./train_large"
@echo "Override paths: ANE_MODEL_PATH=... ANE_DATA_PATH=... ./train_large"

verify-flags:
@echo "=== Active CFLAGS ==="
@echo "$(CFLAGS)"
@echo "=== Compiler version ==="
@xcrun clang --version

.PHONY: clean tokenize probes
clean:
rm -f train train_large train_large_ane train_opt train_double_buffer $(PROBES) test_rmsnorm_bwd test_classifier test_data_validation test_chaining test_chaining_v2 test_bench_paths test_ane_model test_throughput_ceiling test_coreml_chaining test_e5_validate test_mil_custom

.PHONY: clean tokenize probes security-tests verify-flags data setup
38 changes: 36 additions & 2 deletions training/ane_runtime.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,15 +20,33 @@ typedef struct {

static Class g_ANEDesc, g_ANEInMem, g_ANEReq, g_ANEIO;
static bool g_ane_loaded = false;
static id g_ane_client = nil;
static bool g_ane_ok = false;

static void ane_init(void) {
if (g_ane_loaded) return;
dlopen("/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine", RTLD_NOW);
g_ane_loaded = true; // Set first to prevent re-entry (ref: CRIT-01)
void *handle = dlopen(
"/System/Library/PrivateFrameworks/AppleNeuralEngine.framework/AppleNeuralEngine",
RTLD_NOW);
if (!handle) {
fprintf(stderr, "ANE: dlopen failed: %s\n", dlerror());
return;
}
g_ANEDesc = NSClassFromString(@"_ANEInMemoryModelDescriptor");
g_ANEInMem = NSClassFromString(@"_ANEInMemoryModel");
g_ANEReq = NSClassFromString(@"_ANERequest");
g_ANEIO = NSClassFromString(@"_ANEIOSurfaceObject");
g_ane_loaded = true;
if (!g_ANEDesc || !g_ANEInMem || !g_ANEReq || !g_ANEIO) {
fprintf(stderr, "ANE: Private classes not found (macOS version mismatch?)\n");
return;
}
g_ane_ok = true;

Class clientCls = NSClassFromString(@"_ANEClient");
if (clientCls) {
g_ane_client = [clientCls performSelector:@selector(sharedConnection)];
}
}

static IOSurfaceRef ane_create_surface(size_t bytes) {
Expand All @@ -50,6 +68,7 @@ static ANEKernel *ane_compile(NSData *milText, NSData *weightData,
int nInputs, size_t *inputSizes,
int nOutputs, size_t *outputSizes) {
ane_init();
if (!g_ane_ok) { fprintf(stderr, "ANE: not available\n"); return NULL; } // CRIT-01/02
NSError *e = nil;

NSDictionary *wdict = nil;
Expand All @@ -63,6 +82,7 @@ static ANEKernel *ane_compile(NSData *milText, NSData *weightData,

id mdl = ((id(*)(Class,SEL,id))objc_msgSend)(
g_ANEInMem, @selector(inMemoryModelWithDescriptor:), desc);
if (!mdl) { fprintf(stderr, "ANE: inMemoryModel allocation failed\n"); return NULL; } // CRIT-02

// Pre-populate temp dir with MIL + weights
id hx = ((id(*)(id,SEL))objc_msgSend)(mdl, @selector(hexStringIdentifier));
Expand Down Expand Up @@ -151,6 +171,20 @@ static bool ane_eval(ANEKernel *k) {
return ok;
}

static bool ane_eval_rt(ANEKernel *k) {
if (!g_ane_client) return ane_eval(k);
NSError *e = nil;
BOOL ok = ((BOOL(*)(id,SEL,id,id,id,NSError**))objc_msgSend)(
g_ane_client, @selector(evaluateRealTimeWithModel:options:request:error:),
k->model, @{}, k->request, &e);
if (!ok) {
fprintf(stderr, "ANE RT eval failed, falling back to standard: %s\n",
e ? [[e description] UTF8String] : "unknown");
return ane_eval(k);
}
return true;
}

static void ane_free(ANEKernel *k) {
if (!k) return;
NSError *e = nil;
Expand Down
Loading