exrlm/docs/GUIDE.html at main · errantsky/exrlm · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RLM Engine &mdash; Architecture &amp; Reference Guide</title>
<style>
  /* ------------------------------------------------------------------ Reset */
  *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }

  /* --------------------------------------------------------- Design tokens */
  :root {
    --bg:           #f8f9fb;
    --surface:      #ffffff;
    --surface-alt:  #f0f2f5;
    --border:       #d8dde6;
    --border-light: #e8ecf0;
    --text:         #1a1e27;
    --text-muted:   #5a6478;
    --accent:       #5b21b6;
    --accent-mid:   #7c3aed;
    --accent-light: #ede9fe;
    --green:        #15803d;
    --green-light:  #dcfce7;
    --blue:         #1d4ed8;
    --blue-light:   #dbeafe;
    --amber:        #b45309;
    --amber-light:  #fef3c7;
    --red:          #b91c1c;
    --red-light:    #fee2e2;
    --teal:         #0e7490;
    --teal-light:   #cffafe;
    --code-bg:      #1e2130;
    --code-text:    #e2e8f0;
    --radius:       8px;
    --radius-lg:    12px;
    --mono:         "JetBrains Mono", "Fira Code", "Cascadia Code", ui-monospace, "SFMono-Regular", Menlo, monospace;
    --sans:         -apple-system, BlinkMacSystemFont, "Segoe UI", Roboto, "Helvetica Neue", Arial, sans-serif;
  }

  /* --------------------------------------------------------------- Layout */
  html { scroll-behavior: smooth; }
  body {
    font-family: var(--sans);
    font-size: 15px;
    line-height: 1.65;
    color: var(--text);
    background: var(--bg);
  }

  /* ------------------------------------------------------------ Navigation */
  nav {
    position: sticky;
    top: 0;
    z-index: 100;
    background: #1a1028;
    border-bottom: 1px solid #2d1f4a;
    padding: 0 32px;
    display: flex;
    align-items: center;
    gap: 0;
    height: 52px;
    overflow-x: auto;
  }
  nav a {
    color: #c4b5fd;
    text-decoration: none;
    font-size: 13px;
    font-weight: 500;
    white-space: nowrap;
    padding: 0 14px;
    height: 52px;
    display: flex;
    align-items: center;
    border-bottom: 2px solid transparent;
    transition: color 0.15s, border-color 0.15s;
  }
  nav a:hover { color: #f5f3ff; border-bottom-color: #7c3aed; }
  nav .nav-brand {
    color: #f5f3ff;
    font-weight: 700;
    font-size: 14px;
    margin-right: 16px;
    padding-left: 0;
    border-bottom: none;
    flex-shrink: 0;
  }
  nav .nav-brand:hover { border-bottom: none; }

  /* --------------------------------------------------------- Page header */
  .page-header {
    background: linear-gradient(135deg, #1a1028 0%, #2d1b69 50%, #1e1040 100%);
    color: white;
    padding: 64px 48px 56px;
    position: relative;
    overflow: hidden;
  }
  .page-header::before {
    content: "";
    position: absolute;
    top: -50%;
    right: -20%;
    width: 600px;
    height: 600px;
    background: radial-gradient(circle, rgba(124,58,237,0.15) 0%, transparent 70%);
    pointer-events: none;
  }
  .page-header h1 { font-size: 2.4rem; font-weight: 800; margin-bottom: 8px; }
  .page-header .subtitle { color: #c4b5fd; font-size: 1.05rem; max-width: 700px; }

  /* ------------------------------------------------------------ Sections */
  .content { max-width: 1100px; margin: 0 auto; padding: 32px 48px 80px; }
  section { margin-bottom: 48px; }
  h2 {
    font-size: 1.5rem;
    font-weight: 700;
    color: var(--accent);
    padding-bottom: 8px;
    border-bottom: 2px solid var(--accent-light);
    margin-bottom: 20px;
  }
  h3 { font-size: 1.15rem; font-weight: 600; color: var(--text); margin: 20px 0 10px; }
  h4 { font-size: 1rem; font-weight: 600; color: var(--text-muted); margin: 16px 0 8px; }
  p { margin-bottom: 12px; }
  ul, ol { margin-bottom: 12px; padding-left: 1.5rem; }
  li { margin-bottom: 6px; }

  /* ------------------------------------------------------------- Cards */
  .card {
    background: var(--surface);
    border: 1px solid var(--border);
    border-radius: var(--radius-lg);
    padding: 24px;
    margin-bottom: 16px;
  }
  .card-grid {
    display: grid;
    grid-template-columns: repeat(auto-fill, minmax(300px, 1fr));
    gap: 16px;
    margin-bottom: 16px;
  }
  .card-sm {
    background: var(--surface);
    border: 1px solid var(--border);
    border-radius: var(--radius);
    padding: 16px;
  }
  .card-sm h4 { margin-top: 0; font-size: 0.95rem; color: var(--accent); }
  .card-sm p { font-size: 0.9rem; color: var(--text-muted); margin-bottom: 0; }

  /* ------------------------------------------------------------- Tables */
  table { width: 100%; border-collapse: collapse; margin-bottom: 16px; font-size: 0.92rem; }
  th, td { text-align: left; padding: 10px 14px; border-bottom: 1px solid var(--border-light); }
  th { background: var(--surface-alt); color: var(--accent); font-weight: 600; font-size: 0.82rem; text-transform: uppercase; letter-spacing: 0.04em; }
  td { vertical-align: top; }
  tr:hover td { background: #f5f3ff08; }

  /* ------------------------------------------------------------- Code */
  code {
    font-family: var(--mono);
    font-size: 0.88em;
    background: var(--accent-light);
    color: var(--accent);
    padding: 2px 6px;
    border-radius: 4px;
  }
  pre {
    background: var(--code-bg);
    color: var(--code-text);
    padding: 20px 24px;
    border-radius: var(--radius);
    overflow-x: auto;
    margin-bottom: 16px;
    font-family: var(--mono);
    font-size: 0.85rem;
    line-height: 1.55;
  }
  pre code { background: none; color: inherit; padding: 0; font-size: inherit; }

  /* ----------------------------------------------------------- Diagram */
  .diagram {
    background: var(--code-bg);
    color: var(--code-text);
    border: 1px solid #2a2d42;
    border-radius: var(--radius);
    padding: 20px 24px;
    font-family: var(--mono);
    font-size: 0.85rem;
    line-height: 1.45;
    white-space: pre;
    overflow-x: auto;
    margin-bottom: 16px;
  }

  /* ----------------------------------------------------------- Badges */
  .badge {
    display: inline-block;
    padding: 3px 10px;
    border-radius: 12px;
    font-size: 0.78rem;
    font-weight: 600;
    letter-spacing: 0.02em;
  }
  .badge-green { background: var(--green-light); color: var(--green); }
  .badge-blue  { background: var(--blue-light);  color: var(--blue); }
  .badge-amber { background: var(--amber-light); color: var(--amber); }
  .badge-red   { background: var(--red-light);   color: var(--red); }
  .badge-teal  { background: var(--teal-light);  color: var(--teal); }
  .badge-purple { background: var(--accent-light); color: var(--accent); }

  /* --------------------------------------------------------- Callout */
  .callout {
    border-left: 4px solid var(--accent-mid);
    background: var(--accent-light);
    padding: 16px 20px;
    border-radius: 0 var(--radius) var(--radius) 0;
    margin-bottom: 16px;
    font-size: 0.93rem;
  }
  .callout strong { color: var(--accent); }
  .callout-amber {
    border-left-color: var(--amber);
    background: var(--amber-light);
  }
  .callout-amber strong { color: var(--amber); }

  /* ---------------------------------------------------- Responsive */
  @media (max-width: 768px) {
    .page-header { padding: 40px 24px 32px; }
    .page-header h1 { font-size: 1.8rem; }
    .content { padding: 24px 20px 60px; }
    nav { padding: 0 16px; }
    .card-grid { grid-template-columns: 1fr; }
  }

  .step-list { counter-reset: steps; list-style: none; padding-left: 0; }
  .step-list li { counter-increment: steps; position: relative; padding-left: 36px; margin-bottom: 14px; }
  .step-list li::before {
    content: counter(steps);
    position: absolute; left: 0; top: 1px;
    width: 24px; height: 24px;
    background: var(--accent-light); color: var(--accent);
    border-radius: 50%; font-size: 0.8rem; font-weight: 700;
    display: flex; align-items: center; justify-content: center;
  }

  .category-label {
    font-size: 0.75rem; font-weight: 700; text-transform: uppercase;
    letter-spacing: 0.06em; color: var(--text-muted);
    margin-bottom: 10px; margin-top: 24px;
  }
  .category-label:first-child { margin-top: 0; }

  /* ------------------------------------------------------------- Mermaid */
  .mermaid {
    background: var(--surface);
    border: 1px solid var(--border);
    border-radius: var(--radius-lg);
    padding: 24px;
    margin-bottom: 16px;
    text-align: center;
  }
</style>
<script type="module">
  import mermaid from 'https://cdn.jsdelivr.net/npm/mermaid@11/dist/mermaid.esm.min.mjs';
  mermaid.initialize({
    startOnLoad: true,
    theme: 'default',
    themeVariables: {
      primaryColor: '#ede9fe',
      primaryTextColor: '#1a1e27',
      primaryBorderColor: '#7c3aed',
      lineColor: '#5a6478',
      secondaryColor: '#f0f2f5',
      tertiaryColor: '#dbeafe'
    }
  });
</script>
</head>
<body>

<!-- ============================== NAV ============================== -->
<nav>
  <a href="#" class="nav-brand">RLM Engine</a>
  <a href="#overview">Overview</a>
  <a href="#architecture">Architecture</a>
  <a href="#otp">OTP Tree</a>
  <a href="#engine">Engine</a>
  <a href="#sessions">Sessions</a>
  <a href="#tools">Tools</a>
  <a href="#telemetry">Telemetry</a>
  <a href="#tracing">Tracing</a>
  <a href="#dashboard">Dashboard</a>
  <a href="#config">Config</a>
  <a href="#modules">Modules</a>
  <a href="#quickstart">Quick Start</a>
  <a href="#decisions">Decisions</a>
  <a href="#limitations">Limitations</a>
</nav>

<!-- ============================== HEADER ============================== -->
<header class="page-header">
  <h1>RLM Engine &mdash; Reference Guide</h1>
  <p class="subtitle">
    A unified Elixir engine for recursive language model computation.
    One-shot queries and interactive sessions powered by OTP,
    with a LiveView trace dashboard for observability.
  </p>
</header>

<div class="content">

<!-- ============================== 1. OVERVIEW ============================== -->
<section id="overview">
<h2>1. Executive Summary</h2>

<div class="card">
<p>The <strong>Recursive Language Model (RLM)</strong> engine is an Elixir application that lets an LLM write and execute Elixir code in an iterative REPL loop. The engine operates in two modes from a single unified codebase:</p>

<ul>
  <li><strong>One-shot</strong> &mdash; <code>RLM.run/3</code> processes input data through iterative LLM+eval cycles, returning a final answer and run ID</li>
  <li><strong>Interactive</strong> &mdash; <code>RLM.start_session/1</code> + <code>RLM.send_message/3</code> provides a persistent conversational session with binding persistence across turns</li>
</ul>

<p>Both modes share the same <code>RLM.Worker</code> GenServer, the same iterate loop, the same tool system, and the same telemetry/tracing infrastructure. The architecture uses an <strong>async-eval pattern</strong> to prevent deadlocks when eval'd code needs to spawn recursive sub-LLM calls.</p>
</div>

<div class="card-grid">
  <div class="card-sm">
    <h4>Umbrella Structure</h4>
    <p><code>lib/rlm/</code> &mdash; core engine (no web framework)<br>
       <code>lib/rlm_web/</code> &mdash; Phoenix LiveView dashboard (read-only)</p>
  </div>
  <div class="card-sm">
    <h4>Three Invariants</h4>
    <p>Raw input never enters the LLM context. Sub-LLM outputs stay in variables. Stdout is truncated with head+tail strategy.</p>
  </div>
  <div class="card-sm">
    <h4>Tool Approach</h4>
    <p>Tools are sandbox functions called from eval'd code &mdash; not Anthropic <code>tool_use</code> protocol. The LLM writes Elixir code that calls tool functions directly.</p>
  </div>
  <div class="card-sm">
    <h4>LLM Client</h4>
    <p>Anthropic Messages API via <code>Req</code>. Default models: Sonnet 4.6 (large), Haiku 4.5 (small). Requires <code>CLAUDE_API_KEY</code> env var.</p>
  </div>
</div>
</section>

<!-- ============================== 2. ARCHITECTURE ============================== -->
<section id="architecture">
<h2>2. Architecture Overview</h2>

<h3>File Structure</h3>
<div class="diagram">rlm/
&#x251C;&#x2500;&#x2500; lib/
&#x2502;   &#x251C;&#x2500;&#x2500; rlm.ex                      # Public API: run/3, start_session/1, send_message/3
&#x2502;   &#x251C;&#x2500;&#x2500; rlm/                        # Core engine
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; application.ex          # Unified OTP application (core + web)
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; worker.ex               # GenServer: iterate loop + keep_alive mode
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; run.ex                  # Per-run coordinator GenServer
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; eval.ex                 # Sandboxed Code.eval_string
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; sandbox.ex              # Functions injected into eval'd code
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; llm.ex                  # Anthropic Messages API client
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; prompt.ex               # System prompt + message formatting
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; config.ex               # Config struct + loader
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; span.ex                 # Span/run ID generation
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; truncate.ex             # Head+tail string truncation
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; helpers.ex              # chunks/2, grep/2, preview/2, list_bindings/1
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; iex.ex                  # IEx convenience helpers
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; node.ex                 # Distributed Erlang: start/1, info/0, rpc/5
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; event_log.ex            # Per-run trace Agent
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; event_log_sweeper.ex    # Periodic EventLog GC
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; trace_store.ex          # :dets persistence GenServer
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; tool.ex                 # Tool behaviour
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; tool_registry.ex        # Tool dispatch + discovery
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; telemetry/              # Telemetry events + handlers
&#x2502;   &#x2502;   &#x2514;&#x2500;&#x2500; tools/                  # 7 filesystem tools
&#x2502;   &#x251C;&#x2500;&#x2500; rlm_web.ex                  # Phoenix web module
&#x2502;   &#x251C;&#x2500;&#x2500; rlm_web/                    # Phoenix LiveView dashboard
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; endpoint.ex
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; router.ex
&#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; live/
&#x2502;   &#x2502;   &#x2502;   &#x251C;&#x2500;&#x2500; run_list_live.ex    # GET /
&#x2502;   &#x2502;   &#x2502;   &#x2514;&#x2500;&#x2500; run_detail_live.ex  # GET /runs/:run_id
&#x2502;   &#x2502;   &#x2514;&#x2500;&#x2500; components/
&#x2502;   &#x2502;       &#x251C;&#x2500;&#x2500; core_components.ex
&#x2502;   &#x2502;       &#x2514;&#x2500;&#x2500; trace_components.ex
&#x2502;   &#x2514;&#x2500;&#x2500; mix/tasks/                  # mix rlm.smoke, mix rlm.examples
&#x251C;&#x2500;&#x2500; test/
&#x251C;&#x2500;&#x2500; docs/
&#x2502;   &#x2514;&#x2500;&#x2500; GUIDE.html                   # Architecture &amp; reference guide
&#x251C;&#x2500;&#x2500; examples/
&#x2502;   &#x251C;&#x2500;&#x2500; map_reduce_analysis.exs      # Map-Reduce text analysis example
&#x2502;   &#x251C;&#x2500;&#x2500; code_review.exs              # Recursive code review example
&#x2502;   &#x251C;&#x2500;&#x2500; research_synthesis.exs       # Multi-source research synthesis example
&#x2502;   &#x251C;&#x2500;&#x2500; web_fetch.exs                # Web fetch via curl/jq example
&#x2502;   &#x2514;&#x2500;&#x2500; smoke_test.exs               # Live API smoke tests
&#x251C;&#x2500;&#x2500; config/
&#x2502;   &#x251C;&#x2500;&#x2500; config.exs   &#x251C;&#x2500;&#x2500; dev.exs
&#x2502;   &#x251C;&#x2500;&#x2500; test.exs     &#x2514;&#x2500;&#x2500; runtime.exs
&#x2514;&#x2500;&#x2500; mix.exs</div>

<h3>App Responsibilities</h3>
<table>
  <tr><th>App</th><th>Role</th><th>Dependencies</th></tr>
  <tr><td><code>rlm</code></td><td>Core engine: Worker, Eval, LLM, Tools, Telemetry, Tracing</td><td>Req, Jason, Telemetry</td></tr>
  <tr><td><code>rlm_web</code></td><td>Read-only LiveView dashboard for run visualization</td><td>Phoenix, LiveView, rlm (for PubSub + TraceStore)</td></tr>
</table>
</section>

<!-- ============================== 3. OTP TREE ============================== -->
<section id="otp">
<h2>3. OTP Supervision Tree</h2>

<div class="mermaid">
graph TD
  SUP["RLM.Supervisor<br/><small>one_for_one</small>"]

  SUP --> REG["RLM.Registry<br/><small>named process lookup</small>"]
  SUP --> PS["RLM.PubSub<br/><small>event broadcasting</small>"]
  SUP --> TS["RLM.TaskSupervisor<br/><small>bash tool tasks</small>"]
  SUP --> RUNSUP["RLM.RunSup<br/><small>DynamicSupervisor</small>"]
  SUP --> ES["RLM.EventStore<br/><small>EventLog Agents</small>"]
  SUP --> TEL["RLM.Telemetry<br/><small>handler attachment</small>"]
  SUP --> TRACE["RLM.TraceStore<br/><small>:dets persistence</small>"]
  SUP --> SWEEP["EventLog.Sweeper<br/><small>periodic GC</small>"]
  SUP --> WEBTEL["RLMWeb.Telemetry<br/><small>metrics supervisor</small>"]
  SUP --> DNS["DNSCluster<br/><small>cluster discovery</small>"]
  SUP --> EP["RLMWeb.Endpoint<br/><small>Phoenix web server</small>"]

  RUNSUP --> RUN["RLM.Run<br/><small>per-run GenServer, :temporary</small>"]
  RUN --> WDYN["DynamicSupervisor<br/><small>Worker GenServers</small>"]
  RUN --> EVALSUP["Task.Supervisor<br/><small>eval processes</small>"]
  WDYN --> W1["RLM.Worker<br/><small>depth 0</small>"]
  WDYN --> W2["RLM.Worker<br/><small>depth 1</small>"]
  EVALSUP --> ET1["eval task"]
  EVALSUP --> ET2["eval task"]

  style SUP fill:#ede9fe,stroke:#7c3aed,stroke-width:2px
  style RUNSUP fill:#dbeafe,stroke:#1d4ed8,stroke-width:2px
  style RUN fill:#dcfce7,stroke:#15803d,stroke-width:2px
  style WDYN fill:#fef3c7,stroke:#b45309
  style EVALSUP fill:#fef3c7,stroke:#b45309
</div>

<table>
  <tr><th>Child</th><th>OTP Type</th><th>Restart</th><th>Role</th></tr>
  <tr><td><code>RLM.Registry</code></td><td>Registry</td><td>:permanent</td><td>Named process lookup for Workers and EventLog agents via <code>{:via, Registry, ...}</code></td></tr>
  <tr><td><code>RLM.PubSub</code></td><td>Phoenix.PubSub</td><td>:permanent</td><td>Broadcasting telemetry events to LiveView and IEx watchers</td></tr>
  <tr><td><code>RLM.TaskSupervisor</code></td><td>Task.Supervisor</td><td>:permanent</td><td>Supervises bash tool tasks with timeout enforcement</td></tr>
  <tr><td><code>RLM.RunSup</code></td><td>DynamicSupervisor</td><td>:permanent</td><td>Hosts <code>RLM.Run</code> coordinators; each Run owns a DynamicSupervisor for Workers and a Task.Supervisor for eval tasks</td></tr>
  <tr><td><code>RLM.EventStore</code></td><td>DynamicSupervisor</td><td>:permanent</td><td>Hosts per-run EventLog Agents for in-memory trace storage</td></tr>
  <tr><td><code>RLM.Telemetry</code></td><td>GenServer</td><td>:permanent</td><td>Attaches all <code>:telemetry</code> handlers on init (Logger, EventLog, PubSub)</td></tr>
  <tr><td><code>RLM.TraceStore</code></td><td>GenServer</td><td>:permanent</td><td>Owns <code>:dets</code> table for persistent trace storage across restarts</td></tr>
  <tr><td><code>RLM.EventLog.Sweeper</code></td><td>GenServer</td><td>:permanent</td><td>Periodically GCs stale EventLog agents and old :dets records</td></tr>
  <tr><td><code>RLMWeb.Telemetry</code></td><td>Supervisor</td><td>:permanent</td><td>Phoenix telemetry metrics poller</td></tr>
  <tr><td><code>DNSCluster</code></td><td>GenServer</td><td>:permanent</td><td>DNS-based cluster discovery for distributed deployments</td></tr>
  <tr><td><code>RLMWeb.Endpoint</code></td><td>Supervisor</td><td>:permanent</td><td>Phoenix web server (Bandit) serving the LiveView dashboard</td></tr>
</table>

<div class="callout">
  <strong>Key design choice:</strong> Workers use <code>restart: :temporary</code> &mdash; they terminate normally after completion and are never restarted. The <code>one_for_one</code> top-level strategy means infrastructure children (Registry, PubSub, TraceStore) are independently fault-tolerant.
</div>
</section>

<!-- ============================== 4. ENGINE ============================== -->
<section id="engine">
<h2>4. RLM Engine Deep-Dive</h2>

<h3>The Iterate Loop</h3>
<p>Each Worker runs a loop: call the LLM, extract code, eval it, check for <code>final_answer</code>, repeat. Here's the 10-step walkthrough:</p>

<ol class="step-list">
  <li><strong>Check iteration limit</strong> &mdash; If <code>iteration >= max_iterations</code> (default: 25), complete with an error.</li>
  <li><strong>Context compaction</strong> &mdash; <code>maybe_compact/1</code> estimates token count. If above 80% of the model's context window, serializes history into the <code>compacted_history</code> binding and resets to a two-message window.</li>
  <li><strong>Call LLM (synchronous)</strong> &mdash; <code>llm_module.chat(history, model, config, opts)</code> sends the full message history to the Anthropic API with structured output (<code>output_config</code>). Returns <code>{:ok, response, usage}</code> with token counts.</li>
  <li><strong>Parse structured response</strong> &mdash; <code>RLM.LLM.extract_structured/1</code> decodes the JSON response into <code>%{reasoning: ..., code: ...}</code>. If the code field is empty, sends feedback and loops.</li>
  <li><strong>Spawn async eval</strong> &mdash; <code>start_async_eval/7</code> spawns a new process that runs <code>RLM.Eval.run/3</code>. The Worker stores eval context (code, timing, usage) and stays free to handle messages.</li>
  <li><strong>Eval runs</strong> &mdash; The spawned process captures stdout via <code>StringIO</code>, injects sandbox imports, and calls <code>Code.eval_string/3</code> with the current bindings.</li>
  <li><strong>Handle subcalls / direct queries</strong> &mdash; If eval'd code calls <code>lm_query(text)</code>, it does <code>GenServer.call(worker_pid, {:spawn_subcall, ...})</code>. The Worker delegates to <code>RLM.Run.start_worker/2</code> which spawns a child Worker under the run's DynamicSupervisor, and stores the caller's <code>from</code> in <code>pending_subcalls</code>. If called with <code>schema:</code> option, the Worker instead handles a <code>{:direct_query, ...}</code> &mdash; a single LLM call with constrained output that returns a parsed JSON map (no child Worker, no iterate loop).</li>
  <li><strong>Receive eval result</strong> &mdash; The eval Task sends <code>{ref, result}</code> on success (via <code>Task.Supervisor.async_nolink</code>). On success: update bindings, append assistant+feedback messages to history. On crash: <code>{:DOWN, ref, ...}</code> arrives and the Worker sends error feedback and continues.</li>
  <li><strong>Repetition detection</strong> &mdash; <code>maybe_nudge/1</code> checks if the last 3 code blocks are &gt;85% similar (Jaccard). If so, injects a nudge message.</li>
  <li><strong>Check final_answer</strong> &mdash; If the <code>final_answer</code> binding is non-nil, call <code>complete/2</code>. Otherwise, send <code>:iterate</code> to self and loop.</li>
</ol>

<h3>The Async-Eval Pattern (Critical)</h3>

<div class="callout">
  <strong>Why async?</strong> The Worker must remain responsive to <code>{:spawn_subcall, ...}</code> calls from eval'd code. If eval ran synchronously, the Worker's mailbox would be blocked, and <code>GenServer.call</code> from within eval would deadlock.
</div>

<div class="mermaid">
sequenceDiagram
    participant W as Worker GenServer
    participant E as Eval Task
    participant R as RLM.Run
    participant C as Child Worker

    W->>W: handle_info(:iterate)
    W->>W: 1. Call LLM (sync)
    W->>W: 2. Extract code
    W->>E: 3. Task.Supervisor.async_nolink(eval_sup)
    Note over W: 4. Store eval_context + task_ref
    Note over W: 5. {:noreply, state}
    Note over W: Worker mailbox FREE

    E->>E: Code.eval_string(code, bindings)
    E->>W: lm_query() via GenServer.call(:spawn_subcall)
    activate W
    W->>R: 6a. Run.start_worker(run_pid)
    R->>C: spawn + monitor child Worker
    R-->>W: {:ok, child_pid}
    Note over W: 7a. Store from in pending_subcalls
    deactivate W
    Note over E: ...blocked waiting for reply...

    C->>C: iterate loop...
    C->>W: {:rlm_result, span_id, result}
    W->>R: cast :worker_done
    W->>E: GenServer.reply(from, result)
    Note over E: lm_query() returns

    E->>E: eval continues...
    E->>W: {ref, eval_result}
    W->>W: 9. Update bindings
    W->>W: 10. Check final_answer
    Note over W: loop or complete

    rect rgb(254, 226, 226)
    Note over E,W: Error path: eval Task crashes
    E--xW: {:DOWN, ref, :process, pid, reason}
    W->>W: Send error feedback, continue
    end
</div>

<h3>State Machine</h3>

<div class="card-grid">
<div class="card-sm">
<h4>One-shot mode</h4>
<div class="mermaid">
stateDiagram-v2
    [*] --> running: RLM.run/3
    running --> ok: final_answer set
    running --> error: timeout / crash
    ok --> [*]
    error --> [*]
</div>
</div>
<div class="card-sm">
<h4>Keep-alive mode</h4>
<div class="mermaid">
stateDiagram-v2
    [*] --> idle: start_session/1
    idle --> running: send_message/3
    running --> idle: final_answer set
</div>
</div>
</div>

<h3>Context Compaction</h3>
<p>When estimated token count exceeds 80% of the context window, the Worker serializes the entire message history (minus system message) into a string, stores it in the <code>compacted_history</code> binding, and resets to a two-message window: the system message plus a compaction addendum that includes a truncated preview. All bindings are preserved &mdash; only the conversation history is compressed.</p>

<h3>Repetition Detection</h3>
<p>The Worker tracks the last 3 code blocks. If Jaccard similarity exceeds 0.85 between consecutive pairs, a nudge message is injected suggesting the LLM try a different approach or set <code>final_answer</code>.</p>
</section>

<!-- ============================== 5. SESSIONS ============================== -->
<section id="sessions">
<h2>5. Interactive Sessions</h2>

<h3>API</h3>
<table>
  <tr><th>Function</th><th>Returns</th><th>Description</th></tr>
  <tr><td><code>RLM.start_session/1</code></td><td><code>{:ok, session_id}</code></td><td>Start a keep-alive Worker in <code>:idle</code> state</td></tr>
  <tr><td><code>RLM.send_message/3</code></td><td><code>{:ok, answer}</code></td><td>Send text to session, blocks until answer (GenServer.call)</td></tr>
  <tr><td><code>RLM.history/1</code></td><td><code>{:ok, [map()]}</code></td><td>Get full message history for a session</td></tr>
  <tr><td><code>RLM.status/1</code></td><td><code>{:ok, map()}</code></td><td>Get session metadata (status, iteration count, cwd, etc.)</td></tr>
</table>

<h3>How It Works</h3>
<ul>
  <li><code>start_session/1</code> starts a Worker with <code>keep_alive: true</code>, which initializes in <code>:idle</code> state with only a system message in history.</li>
  <li><code>send_message/3</code> appends a user message, resets <code>final_answer</code> to nil, transitions to <code>:running</code>, and triggers the iterate loop.</li>
  <li>When <code>final_answer</code> is set, the Worker replies to the caller, emits <code>[:rlm, :turn, :complete]</code>, and returns to <code>:idle</code>.</li>
  <li><strong>Binding persistence:</strong> All variables (including <code>compacted_history</code>) survive across turns. Only <code>final_answer</code>, iteration count, and <code>prev_codes</code> reset per turn.</li>
</ul>

<h3>IEx Helpers</h3>
<p><code>RLM.IEx</code> provides convenience wrappers for interactive use from the Elixir shell:</p>

<pre><code>iex&gt; import RLM.IEx

iex&gt; session = start()
Session started: span-abc123

iex&gt; chat(session, "List files in the current directory")
[You] List files in the current directory
[RLM] ...

iex&gt; {session, _} = start_chat("Read the README and summarize it")

iex&gt; watch(session)        # Subscribe to live telemetry events
iex&gt; history(session)      # Print full message history
iex&gt; status(session)       # Print session stats</code></pre>

<table>
  <tr><th>Helper</th><th>Description</th></tr>
  <tr><td><code>start/1</code></td><td>Start a session, print ID, return it</td></tr>
  <tr><td><code>chat/2-3</code></td><td>Send message, print response, return <code>{session_id, response}</code></td></tr>
  <tr><td><code>start_chat/2</code></td><td>Start + first message in one call</td></tr>
  <tr><td><code>watch/2</code></td><td>Subscribe to PubSub events, print iterations live until turn completes</td></tr>
  <tr><td><code>history/1</code></td><td>Print full message history</td></tr>
  <tr><td><code>status/1</code></td><td>Print session metadata</td></tr>
  <tr><td><code>remote/3</code></td><td>Run an RLM query on a remote node via <code>RLM.Node.rpc</code></td></tr>
  <tr><td><code>node_info/0</code></td><td>Print distribution status via <code>RLM.Node.info</code></td></tr>
</table>
</section>

<!-- ============================== 6. TOOLS ============================== -->
<section id="tools">
<h2>6. Tool System</h2>

<h3>Tool Behaviour</h3>
<p><code>RLM.Tool</code> defines a simple behaviour with three callbacks:</p>
<pre><code>@callback name() :: String.t()
@callback description() :: String.t()
@callback execute(map()) :: {:ok, String.t()} | {:error, String.t()}</code></pre>

<p>Modules implement the behaviour via <code>use RLM.Tool</code>. Tools are stateless functions that take a params map and return a string result.</p>

<h3>Tool Registry</h3>
<p><code>RLM.ToolRegistry</code> holds a compile-time list of all 7 tool modules and provides dispatch:</p>
<table>
  <tr><th>Function</th><th>Returns</th><th>Description</th></tr>
  <tr><td><code>all/0</code></td><td><code>[module()]</code></td><td>All registered tool modules</td></tr>
  <tr><td><code>names/0</code></td><td><code>[String.t()]</code></td><td>Tool name strings</td></tr>
  <tr><td><code>descriptions/0</code></td><td><code>[{name, desc}]</code></td><td>All tool descriptions</td></tr>
  <tr><td><code>execute/2</code></td><td><code>{:ok, s} | {:error, s}</code></td><td>Dispatch by name</td></tr>
  <tr><td><code>description_for/1</code></td><td><code>{:ok, desc}</code></td><td>Lookup single tool description</td></tr>
</table>

<h3>The 7 Tools</h3>
<table>
  <tr><th>Module</th><th>Name</th><th>Description</th></tr>
  <tr><td><code>RLM.Tools.ReadFile</code></td><td>read_file</td><td>Read file contents (up to 100 KB)</td></tr>
  <tr><td><code>RLM.Tools.WriteFile</code></td><td>write_file</td><td>Write or overwrite a file (creates parent dirs)</td></tr>
  <tr><td><code>RLM.Tools.EditFile</code></td><td>edit_file</td><td>Exact-string replacement (uniqueness-guarded)</td></tr>
  <tr><td><code>RLM.Tools.Bash</code></td><td>bash</td><td>Shell commands via Task.yield timeout</td></tr>
  <tr><td><code>RLM.Tools.Grep</code></td><td>grep</td><td>ripgrep search with glob filtering</td></tr>
  <tr><td><code>RLM.Tools.Glob</code></td><td>glob</td><td>Find files by pattern</td></tr>
  <tr><td><code>RLM.Tools.Ls</code></td><td>ls</td><td>List directory contents with sizes</td></tr>
</table>

<h3>Sandbox Wrappers</h3>
<p><code>RLM.Sandbox</code> exposes all tools as functions available inside eval'd code. Paths resolve relative to the session's working directory (<code>cwd</code> injected via <code>Process.put(:rlm_cwd, ...)</code>).</p>

<table>
  <tr><th>Sandbox Function</th><th>Delegates To</th></tr>
  <tr><td><code>read_file(path)</code></td><td><code>RLM.Tools.ReadFile.execute/1</code></td></tr>
  <tr><td><code>write_file(path, content)</code></td><td><code>RLM.Tools.WriteFile.execute/1</code></td></tr>
  <tr><td><code>edit_file(path, old, new)</code></td><td><code>RLM.Tools.EditFile.execute/1</code></td></tr>
  <tr><td><code>bash(command)</code> / <code>bash(command, opts)</code></td><td><code>RLM.Tools.Bash.execute/1</code></td></tr>
  <tr><td><code>rg(pattern)</code> / <code>rg(pattern, path, opts)</code></td><td><code>RLM.Tools.Grep.execute/1</code></td></tr>
  <tr><td><code>find_files(pattern)</code> / <code>find_files(pattern, base)</code></td><td><code>RLM.Tools.Glob.execute/1</code></td></tr>
  <tr><td><code>ls()</code> / <code>ls(path)</code></td><td><code>RLM.Tools.Ls.execute/1</code></td></tr>
  <tr><td><code>list_tools()</code></td><td><code>RLM.ToolRegistry.descriptions/0</code></td></tr>
  <tr><td><code>tool_help(name)</code></td><td><code>RLM.ToolRegistry.description_for/1</code></td></tr>
</table>

<p>Additionally, <code>RLM.Sandbox</code> provides the data-processing helpers (<code>chunks/2</code>, <code>grep/2</code>, <code>preview/2</code>, <code>list_bindings/0</code>) and the LLM sub-call functions (<code>lm_query/1-2</code>, <code>parallel_query/1-2</code>). When <code>lm_query</code> is called with a <code>schema:</code> option, it takes a direct-query path: a single LLM call with constrained JSON output, returning a parsed map instead of a string. Both <code>lm_query</code> and <code>parallel_query</code> support the <code>schema:</code> option.</p>

<h3>Process Dictionary Injection</h3>
<p>When <code>RLM.Eval.run/3</code> spawns the eval process, it stores four keys in the process dictionary for sandbox functions to access:</p>

<table>
  <tr><th>Key</th><th>Purpose</th><th>Read By</th></tr>
  <tr><td><code>:rlm_worker_pid</code></td><td>PID of the parent Worker GenServer</td><td><code>lm_query/2</code>, <code>parallel_query/2</code> &mdash; for subcall/direct query GenServer.call</td></tr>
  <tr><td><code>:rlm_bindings_info</code></td><td>Pre-formatted bindings summary</td><td><code>list_bindings/0</code> &mdash; returns current bindings metadata</td></tr>
  <tr><td><code>:rlm_cwd</code></td><td>Session working directory</td><td>All filesystem tool wrappers &mdash; resolve relative paths</td></tr>
  <tr><td><code>:rlm_subcall_timeout</code></td><td>Bounded timeout for subcalls (default: 600s)</td><td><code>lm_query/2</code>, <code>parallel_query/2</code> &mdash; passed to <code>GenServer.call</code> and <code>Task.await_many</code></td></tr>
</table>
</section>

<!-- ============================== 7. TELEMETRY ============================== -->
<section id="telemetry">
<h2>7. Telemetry &amp; Observability</h2>

<h3>17 Telemetry Events</h3>
<table>
  <tr><th>Event</th><th>Measurements</th><th>Key Metadata</th></tr>
  <tr><td><code>[:rlm, :node, :start]</code></td><td>&mdash;</td><td>span_id, depth, model, context_bytes</td></tr>
  <tr><td><code>[:rlm, :node, :stop]</code></td><td>duration_ms, total_iterations</td><td>span_id, status, result_preview</td></tr>
  <tr><td><code>[:rlm, :node, :exception]</code></td><td>&mdash;</td><td>(defined but not currently emitted)</td></tr>
  <tr><td><code>[:rlm, :iteration, :start]</code></td><td>&mdash;</td><td>span_id, iteration</td></tr>
  <tr><td><code>[:rlm, :iteration, :stop]</code></td><td>duration_ms</td><td>iteration, code, stdout_preview, eval_status, bindings_snapshot, llm tokens</td></tr>
  <tr><td><code>[:rlm, :llm, :request, :start]</code></td><td>&mdash;</td><td>messages_count</td></tr>
  <tr><td><code>[:rlm, :llm, :request, :stop]</code></td><td>duration_ms, prompt/completion/total_tokens</td><td>response_preview, code_present, reasoning_preview</td></tr>
  <tr><td><code>[:rlm, :llm, :request, :exception]</code></td><td>duration_ms</td><td>error</td></tr>
  <tr><td><code>[:rlm, :eval, :start]</code></td><td>&mdash;</td><td>code, iteration</td></tr>
  <tr><td><code>[:rlm, :eval, :stop]</code></td><td>duration_ms</td><td>status (:ok/:error), stdout_bytes</td></tr>
  <tr><td><code>[:rlm, :eval, :exception]</code></td><td>&mdash;</td><td>(defined but not currently emitted)</td></tr>
  <tr><td><code>[:rlm, :subcall, :spawn]</code></td><td>&mdash;</td><td>child_span_id, child_depth, context_bytes, model_size</td></tr>
  <tr><td><code>[:rlm, :subcall, :result]</code></td><td>duration_ms</td><td>child_span_id, status, result_preview</td></tr>
  <tr><td><code>[:rlm, :direct_query, :start]</code></td><td>&mdash;</td><td>query_id, model_size, text_bytes</td></tr>
  <tr><td><code>[:rlm, :direct_query, :stop]</code></td><td>&mdash;</td><td>query_id, status, result_preview</td></tr>
  <tr><td><code>[:rlm, :compaction, :run]</code></td><td>before_tokens, after_tokens</td><td>history_bytes_compacted</td></tr>
  <tr><td><code>[:rlm, :turn, :complete]</code></td><td>duration_ms, total_iterations</td><td>span_id, status, result_preview (keep-alive only)</td></tr>
</table>

<h3>Three Handlers</h3>
<table>
  <tr><th>Handler</th><th>Attachment ID</th><th>Purpose</th></tr>
  <tr><td><code>RLM.Telemetry.Logger</code></td><td><code>"rlm-logger"</code></td><td>Structured <code>Logger</code> output for node lifecycle, iterations, LLM calls</td></tr>
  <tr><td><code>RLM.Telemetry.EventLogHandler</code></td><td><code>"rlm-event-log"</code></td><td>Writes events to both in-memory EventLog Agent and persistent TraceStore</td></tr>
  <tr><td><code>RLM.Telemetry.PubSub</code></td><td><code>"rlm-pubsub"</code></td><td>Broadcasts to <code>"rlm:runs"</code> and <code>"rlm:run:&lt;run_id&gt;"</code> PubSub topics</td></tr>
</table>

<h3>PubSub Topics</h3>
<table>
  <tr><th>Topic</th><th>Subscribers</th><th>Content</th></tr>
  <tr><td><code>"rlm:runs"</code></td><td>RunListLive, IEx watchers</td><td>All telemetry events (for new run detection)</td></tr>
  <tr><td><code>"rlm:run:&lt;run_id&gt;"</code></td><td>RunDetailLive, IEx.watch/2</td><td>Events scoped to a specific run</td></tr>
</table>

<h3>Event Flow</h3>
<div class="mermaid">
flowchart LR
    W["Worker<br/>:telemetry.execute"] --> L["Logger handler"]
    W --> ELH["EventLogHandler"]
    W --> PSH["PubSub handler"]

    L --> LOG["Elixir Logger"]
    ELH --> EA["EventLog Agent<br/><small>in-memory tree</small>"]
    ELH --> TS["TraceStore<br/><small>:dets persistence</small>"]
    PSH --> T1["rlm:runs"]
    PSH --> T2["rlm:run:&lt;run_id&gt;"]

    style W fill:#ede9fe,stroke:#7c3aed,stroke-width:2px
    style EA fill:#dcfce7,stroke:#15803d
    style TS fill:#dbeafe,stroke:#1d4ed8
    style T1 fill:#fef3c7,stroke:#b45309
    style T2 fill:#fef3c7,stroke:#b45309
</div>
</section>

<!-- ============================== 8. TRACING ============================== -->
<section id="tracing">
<h2>8. Tracing &amp; Persistence</h2>

<h3>Dual-Write Pattern</h3>
<p>Every telemetry event is written to <strong>two stores</strong> simultaneously by <code>RLM.Telemetry.EventLogHandler</code>:</p>

<div class="card-grid">
  <div class="card-sm">
    <h4>EventLog Agent (hot path)</h4>
    <p>In-memory <code>Agent</code> per run. Builds a span tree incrementally. Used by RunDetailLive for live updates. Swept after TTL (default: 1 hour).</p>
  </div>
  <div class="card-sm">
    <h4>TraceStore :dets (cold path)</h4>
    <p>Persistent <code>:dets</code> <code>:bag</code> table at <code>priv/traces.dets</code>. Survives restarts. Used by RunListLive for historical runs and RunDetailLive as fallback.</p>
  </div>
</div>

<h3>EventLog Agent Lifecycle</h3>
<ol>
  <li><strong>Created on demand:</strong> When a <code>:node_start</code> event arrives, <code>EventLogHandler.ensure_event_log/1</code> starts an Agent under <code>RLM.EventStore</code> (DynamicSupervisor) if one doesn't exist for that run.</li>
  <li><strong>Accumulates events:</strong> <code>EventLog.append/2</code> adds events and incrementally builds a span tree (<code>update_tree/2</code>).</li>
  <li><strong>Queried by dashboard:</strong> <code>EventLog.get_tree/1</code> returns the current span tree; <code>get_events/1</code> returns chronological event list.</li>
  <li><strong>Swept by Sweeper:</strong> After the TTL elapses, <code>EventLog.Sweeper</code> terminates the Agent via <code>DynamicSupervisor.terminate_child/2</code>.</li>
</ol>

<h3>EventLog.Sweeper</h3>
<p>A GenServer that runs on a timer (default: every 5 minutes). It:</p>
<ul>
  <li>Scans all children of <code>RLM.EventStore</code></li>
  <li>Terminates agents whose <code>started_at</code> monotonic timestamp exceeds the TTL (default: 1 hour)</li>
  <li>Calls <code>RLM.TraceStore.delete_older_than/1</code> with a wall-clock cutoff to garbage-collect old :dets records too</li>
  <li>Uses separate monotonic vs. wall-clock cutoffs since EventLog uses monotonic time but TraceStore events use wall-clock <code>System.system_time</code></li>
</ul>

<h3>TraceStore :dets Details</h3>
<table>
  <tr><th>Property</th><th>Value</th></tr>
  <tr><td>Table name</td><td><code>:rlm_traces</code></td></tr>
  <tr><td>Type</td><td><code>:bag</code> (multiple events per run_id key)</td></tr>
  <tr><td>File path</td><td><code>Application.app_dir(:rlm, "priv/traces.dets")</code></td></tr>
  <tr><td>Record format</td><td><code>{run_id, event_map}</code></td></tr>
  <tr><td>Write mode</td><td><code>GenServer.cast</code> (async, non-blocking)</td></tr>
  <tr><td>Read mode</td><td><code>GenServer.call</code> (sync)</td></tr>
  <tr><td>Cleanup</td><td><code>delete_older_than/1</code> &mdash; two-pass: fold to collect, then delete</td></tr>
</table>
</section>

<!-- ============================== 9. DASHBOARD ============================== -->
<section id="dashboard">
<h2>9. LiveView Dashboard</h2>

<p>The <code>lib/rlm_web/</code> layer provides a read-only Phoenix LiveView dashboard for visualizing RLM runs. It reuses <code>RLM.PubSub</code> for real-time updates.</p>

<h3>Routes</h3>
<table>
  <tr><th>Path</th><th>LiveView</th><th>Description</th></tr>
  <tr><td><code>GET /</code></td><td><code>RLMWeb.RunListLive</code></td><td>Table of all runs with status, iterations, duration</td></tr>
  <tr><td><code>GET /runs/:run_id</code></td><td><code>RLMWeb.RunDetailLive</code></td><td>Recursive span tree with expandable iteration cards</td></tr>
</table>

<h3>RunListLive</h3>
<ul>
  <li>On mount: loads all run summaries from <code>RLM.TraceStore.list_run_ids/0</code>, rebuilding summary info from stored events</li>
  <li>Subscribes to <code>"rlm:runs"</code> PubSub topic for live updates</li>
  <li>New rows appear within ~1 second via <code>:node_start</code> events (root spans only)</li>
  <li>Updates status and duration via <code>:node_stop</code> events</li>
  <li>Shows: run ID (truncated), relative time, status badge, iteration count, duration</li>
</ul>

<h3>RunDetailLive</h3>
<ul>
  <li>On mount: tries live EventLog Agent first (<code>Registry.lookup</code>), falls back to TraceStore for completed/swept runs</li>
  <li>Subscribes to <code>"rlm:run:&lt;run_id&gt;"</code> for scoped updates</li>
  <li>Renders a recursive span tree using <code>TraceComponents.span_node/1</code></li>
  <li>Each iteration is expandable, showing: code, stdout preview, bindings snapshot, LLM token usage</li>
  <li>Child spans are rendered recursively, color-coded by depth</li>
</ul>

<h3>TraceComponents</h3>
<table>
  <tr><th>Component</th><th>Purpose</th></tr>
  <tr><td><code>span_node/1</code></td><td>Recursive span display with status dot, model badge, depth, timing, and nested children</td></tr>
  <tr><td><code>iteration_card/1</code></td><td>Collapsible card showing code, stdout, bindings, and token counts for a single iteration</td></tr>
</table>
</section>

<!-- ============================== 10. CONFIG ============================== -->
<section id="config">
<h2>10. Configuration</h2>

<p><code>RLM.Config</code> loads defaults from application env, allows runtime overrides via keyword list. All fields are available on the <code>%RLM.Config{}</code> struct.</p>

<table>
  <tr><th>Field</th><th>Default</th><th>Type</th><th>Notes</th></tr>
  <tr><td><code>api_base_url</code></td><td><code>"https://api.anthropic.com"</code></td><td>string</td><td>Anthropic API base URL</td></tr>
  <tr><td><code>api_key</code></td><td><code>$CLAUDE_API_KEY</code></td><td>string</td><td>From env var at load time</td></tr>
  <tr><td><code>model_large</code></td><td><code>"claude-sonnet-4-6"</code></td><td>string</td><td>Used for parent Workers</td></tr>
  <tr><td><code>model_small</code></td><td><code>"claude-haiku-4-5"</code></td><td>string</td><td>Used for subcalls</td></tr>
  <tr><td><code>max_iterations</code></td><td><code>25</code></td><td>integer</td><td>Per-Worker LLM turn limit</td></tr>
  <tr><td><code>max_depth</code></td><td><code>5</code></td><td>integer</td><td>Recursive subcall depth limit</td></tr>
  <tr><td><code>max_concurrent_subcalls</code></td><td><code>10</code></td><td>integer</td><td>Parallel subcall limit per Worker</td></tr>
  <tr><td><code>context_window_tokens_large</code></td><td><code>200_000</code></td><td>integer</td><td>Token budget for compaction threshold (large model)</td></tr>
  <tr><td><code>context_window_tokens_small</code></td><td><code>200_000</code></td><td>integer</td><td>Token budget for compaction threshold (small model)</td></tr>
  <tr><td><code>truncation_head</code></td><td><code>4000</code></td><td>integer</td><td>Characters to show from start of truncated output</td></tr>
  <tr><td><code>truncation_tail</code></td><td><code>4000</code></td><td>integer</td><td>Characters to show from end of truncated output</td></tr>
  <tr><td><code>eval_timeout</code></td><td><code>300_000</code></td><td>ms</td><td>Timeout per eval execution (5 min)</td></tr>
  <tr><td><code>llm_timeout</code></td><td><code>120_000</code></td><td>ms</td><td>Timeout per LLM API request (2 min)</td></tr>
  <tr><td><code>subcall_timeout</code></td><td><code>600_000</code></td><td>ms</td><td>Timeout for subcall completion (10 min)</td></tr>
  <tr><td><code>cost_per_1k_prompt_tokens_large</code></td><td><code>0.003</code></td><td>float</td><td>For cost estimation (large model)</td></tr>
  <tr><td><code>cost_per_1k_prompt_tokens_small</code></td><td><code>0.0008</code></td><td>float</td><td>For cost estimation (small model)</td></tr>
  <tr><td><code>cost_per_1k_completion_tokens_large</code></td><td><code>0.015</code></td><td>float</td><td>For cost estimation (large model)</td></tr>
  <tr><td><code>cost_per_1k_completion_tokens_small</code></td><td><code>0.004</code></td><td>float</td><td>For cost estimation (small model)</td></tr>
  <tr><td><code>enable_otel</code></td><td><code>false</code></td><td>boolean</td><td>OpenTelemetry integration (reserved)</td></tr>
  <tr><td><code>enable_event_log</code></td><td><code>true</code></td><td>boolean</td><td>Enable EventLog trace recording</td></tr>
  <tr><td><code>event_log_capture_full_stdout</code></td><td><code>false</code></td><td>boolean</td><td>Store full stdout in event log (not just preview)</td></tr>
  <tr><td><code>llm_module</code></td><td><code>RLM.LLM</code></td><td>module</td><td>Swappable for <code>RLM.Test.MockLLM</code> in tests</td></tr>
</table>
</section>

<!-- ============================== 11. MODULE MAP ============================== -->
<section id="modules">
<h2>11. Module Map</h2>

<p class="category-label">Core Engine</p>
<div class="card-grid">
  <div class="card-sm">
    <h4>RLM</h4>
    <p>Public API: <code>run/3</code>, <code>run_async/3</code>, <code>start_session/1</code>, <code>send_message/3</code>, <code>history/1</code>, <code>status/1</code></p>
  </div>
  <div class="card-sm">
    <h4>RLM.Worker</h4>
    <p>GenServer per execution node. Iterate loop, async-eval (supervised Task), subcall handling (delegated to Run), direct query (schema mode), keep-alive mode, compaction, nudging</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Run</h4>
    <p>Per-run coordinator. Owns worker DynamicSupervisor + eval Task.Supervisor. ETS worker tree tracking, crash propagation to parent workers, cascade shutdown</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Eval</h4>
    <p>Sandboxed <code>Code.eval_string</code> with IO capture, timeout, process dictionary injection</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Sandbox</h4>
    <p>Functions injected into eval'd code: helpers, LLM sub-calls, filesystem tool wrappers</p>
  </div>
  <div class="card-sm">
    <h4>RLM.LLM</h4>
    <p>Anthropic Messages API client via Req. <code>chat/4</code> with structured output and optional <code>schema:</code> override. <code>extract_structured/1</code> for response parsing</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Config</h4>
    <p>Config struct with 21 fields. Loads from app env + keyword overrides</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Prompt</h4>
    <p>System prompt loading from <code>priv/system_prompt.md</code>. User, feedback, compaction, and nudge message builders</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Helpers</h4>
    <p><code>chunks/2</code>, <code>grep/2</code>, <code>preview/2</code>, <code>list_bindings/1</code> &mdash; data processing utilities</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Truncate</h4>
    <p>Head+tail string truncation with omission marker</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Span</h4>
    <p>Span and run ID generation via <code>:crypto.strong_rand_bytes</code></p>
  </div>
  <div class="card-sm">
    <h4>RLM.IEx</h4>
    <p>IEx convenience helpers: <code>start</code>, <code>chat</code>, <code>start_chat</code>, <code>watch</code>, <code>history</code>, <code>status</code>, <code>remote</code>, <code>node_info</code></p>
  </div>
  <div class="card-sm">
    <h4>RLM.Node</h4>
    <p>Distributed Erlang: <code>start/1</code> (configure distribution), <code>info/0</code> (cluster status), <code>rpc/5</code> (remote calls via <code>:erpc</code>)</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Node.Info</h4>
    <p>Struct holding distribution status: <code>node</code>, <code>alive</code>, <code>cookie</code>, <code>connected_nodes</code>, <code>visible_nodes</code>, <code>hidden_nodes</code></p>
  </div>
  <div class="card-sm">
    <h4>RLM.Application</h4>
    <p>OTP application module. Starts 11-child supervision tree (8 core + 3 web)</p>
  </div>
</div>

<p class="category-label">Tools</p>
<div class="card-grid">
  <div class="card-sm">
    <h4>RLM.Tool</h4>
    <p>Behaviour defining <code>name/0</code>, <code>description/0</code>, <code>execute/1</code> callbacks</p>
  </div>
  <div class="card-sm">
    <h4>RLM.ToolRegistry</h4>
    <p>Central dispatch: <code>all/0</code>, <code>names/0</code>, <code>descriptions/0</code>, <code>execute/2</code>, <code>description_for/1</code></p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.ReadFile</h4>
    <p>Read file contents (up to 100 KB)</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.WriteFile</h4>
    <p>Write or overwrite a file, creates parent directories</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.EditFile</h4>
    <p>Exact-string replacement with uniqueness guard</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.Bash</h4>
    <p>Shell command execution via Task.yield timeout</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.Grep</h4>
    <p>ripgrep search with glob filtering</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.Glob</h4>
    <p>Find files by pattern</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Tools.Ls</h4>
    <p>List directory contents with sizes</p>
  </div>
</div>

<p class="category-label">Telemetry</p>
<div class="card-grid">
  <div class="card-sm">
    <h4>RLM.Telemetry</h4>
    <p>GenServer that defines 17 events and attaches all handlers on init</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Telemetry.Logger</h4>
    <p>Structured logging handler for node lifecycle, iterations, LLM calls</p>
  </div>
  <div class="card-sm">
    <h4>RLM.Telemetry.PubSub</h4>
    <p>Broadcasts events to <code>"rlm:runs"</code> and <code>"rlm:run:&lt;run_id&gt;"</code></p>
  </div>
  <div class="card-sm">
    <h4>RLM.Telemetry.EventLogHandler</h4>
    <p>Dual-write: routes events to EventLog Agent and TraceStore</p>
  </div>
</div>

<p class="category-label">Tracing &amp; Persistence</p>
<div class="card-grid">
  <div class="card-sm">
    <h4>RLM.EventLog</h4>
    <p>Per-run Agent storing events and building span tree incrementally</p>
  </div>