From f55f55b7c501a86e1e2aaa1f3d33553f516a32fc Mon Sep 17 00:00:00 2001 From: rohkymntn Date: Sat, 14 Feb 2026 13:27:59 -0800 Subject: [PATCH 1/2] Add GPT-backed agentic optimization API and frontend integration --- api/README.md | 59 +++++ api/__pycache__/agentic.cpython-311.pyc | Bin 0 -> 11722 bytes api/__pycache__/main.cpython-311.pyc | Bin 0 -> 6296 bytes api/agentic.py | 329 ++++++++++++++++++++++++ api/main.py | 130 ++++++++++ api/requirements.txt | 4 + frontend/README.md | 30 +++ frontend/app.js | 94 +++++++ frontend/index.html | 49 ++++ frontend/style.css | 233 +++++++++++++++++ 10 files changed, 928 insertions(+) create mode 100644 api/README.md create mode 100644 api/__pycache__/agentic.cpython-311.pyc create mode 100644 api/__pycache__/main.cpython-311.pyc create mode 100644 api/agentic.py create mode 100644 api/main.py create mode 100644 api/requirements.txt create mode 100644 frontend/README.md create mode 100644 frontend/app.js create mode 100644 frontend/index.html create mode 100644 frontend/style.css diff --git a/api/README.md b/api/README.md new file mode 100644 index 0000000..751e8e4 --- /dev/null +++ b/api/README.md @@ -0,0 +1,59 @@ +# FERB Agentic API + +This API now supports: + +- `POST /chat` - GPT-backed kernel optimization chat +- `POST /optimize` - iterative agent loop for solution improvement + +## Setup + +```bash +cd /Users/rohk/FERB/api +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt +export OPENAI_API_KEY="your_key_here" +uvicorn main:app --reload --host 0.0.0.0 --port 8000 +``` + +## `POST /chat` + +Request: + +```json +{ + "message": "How do I optimize problem 10 for fewer all-to-all rounds?", + "model": "gpt-4o-mini" +} +``` + +## `POST /optimize` + +Runs a generate -> (optional evaluate) -> select-best loop. + +Request: + +```json +{ + "objective": "Improve throughput while keeping correctness for problem 1 all-reduce", + "problem_id": 1, + "iterations": 3, + "model": "gpt-4o-mini", + "topology_json_path": "/Users/rohk/FERB/utils/example_topologies/nccl_topology_parsed.json", + "evaluator_command": "python /path/to/evaluator.py --candidate {candidate_path}", + "evaluator_timeout_s": 240 +} +``` + +Notes: + +- `evaluator_command` is optional. If omitted, the API still iterates and stores candidates. +- If used, your evaluator should print either: + - JSON: `{"score": 123.4}`, or + - text line: `score=123.4` +- Higher score is treated as better. + +Outputs are saved in: + +- `/Users/rohk/FERB/.agent_runs//candidate_iter_.py` + diff --git a/api/__pycache__/agentic.cpython-311.pyc b/api/__pycache__/agentic.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..910e83d4323399460852b1fee6fd4df999fb6d15 GIT binary patch literal 11722 zcmb7Kd2Ab3dY>VO*9>nR)MaU`!!~7!N_@ppY&o(mJ4#~7D=XQx;*_B|Ba1dga^H-6 zFvBf3NU=)bBF=7#XuD2VT@+cTG0+r6fo!|mZWsM?7a(009tdHbxfY;D^xW$M83=%}_no9DWFikFAW$;d-5vqBlUwSRo|4)i?2!&joe=g) zPfHO9`=m+f8L9V8$I=ezh%^K_{nAls7lZ@Slr#pRD2+>9k{y0AY2vm+Iwl=_)48-$ zvY%t3&*nekr!UDFHI)>zIW@JK$|uxRHY299*_^nXCE}@>h2!ECnPlX&sASV?_*9AU z{NhSV5m&R)T3Qx!BzrX_$;u=@E}lNSC?;1Dsz~HqIwdROwUoMI){@L-RQb9Jg=STm zKs#4uF`39nDJh}KBJ_f0LgRaBU$U7*T9mIQ(rdI!{AV~nQPvO=nM_utdCDB5J@9%W znNBDQB)GE)bp=BEbmj&G&iS02nVvN~L|&GOoJq=gA(p@a$H`ho$^Ytnc1=tWSxjU^ z`Fc(!s<^tAR#W4r&%PuoHxyM~Rp6N`V=YQ8UhQykh&16@z(3!`!k0ZC1HOAN9Ox4#T zeM3yG(&fb7)RtTP_F5vHQg6ig>GZY44F#4~T_YJ0mL?^MXK$!0Fz{qnG6$A07u$Nd z#v_ys65?z1{5`Q)Ec%+byq2K@it#72A}p(z$|TckP}ORJT#>TZGU76>8q7$twm)cK zHxuj^FGA#fMrM{+kP)_*Kr+a7$#w*CB|F488goF*Y2~@BnA?hZXpDy)AsB%gcV;sS zvI5Lt*i#wR@YLogn=$OLa)#gH+;|RH?xA-%odvGf#JOxnHXKSaOJpNhW8W-^E2@-T zQ@7>F1S*0FlGE{JSow>IOk!1z#|=InH|G!H-gq1rqV^;Zk2mduc>E*g z3!IaO0~0SPz%vshyK-d$nR6nMOHELgPbFiy8w7}>pnnezsd?{3WzK|CTqE3*0#Yq($$;#cGwQKw&E)W=rIf=#oL-I{IFQ7_dTO4}B3v-zPF zZ|&_lMq_cY^QU+buIXe_&VgD>Tr*`I=te48$A|LkMu7&Z_iPDM_TEe;?zLDP0neL<5Gh1h?_0PQC<_| zok{pB7l9O+s+;i!*C%i7yU+LC?eqiWcV5l4zsRTw!?h#$^7oRt| ziU<92F)gpgQxd_Q*1}u3#?W#0iy}l?1%k%>2sZr(HZ-IPtj2CO^CXluuhunG+`wD- zo?T;q#C*Wvnr=36TKz2$C){1fBOBd0_s|Y9Ge0sUo=GVRs%XoQ5SLS6`k;wa8;_W` zO^Od4YwGg&VOUEyEJ9X-S3a^EzJ(bO#WQm!X5uGj7YwI-9kjoKR!KsN<0gyR$N}h! z!J#fuu!Hr5i^OT6`kYzKlVeb52L8%fAh7p^u8qs3-f{CT3**I;x)6BhnYW*L>*&p+ zxIQnkdfTUNSqA%K$WB-@x(?H&;PN=AMNw(WI;BW@ROE#E5Xu07~6zrP) zGVUvSfvk2sUPiX|10c~a*|co9sKF1m z6SoE?$wxMF5_(`bQkmSEO4kr%+%C+5eW2vPt&kPNnFMxMWg~p<{JF)MGw~D8O)tjJ zF3g`fyJ!etK*6hY!%*QE<;V$m->lbZ>O{kBRfb~*0a(|auvBsEKd?nEeS@5Xr}$Sc z0Vy)ScQgLJvTsMlx1)GU4|G%l1NQ^F?ge(~{eiKO^k)zA1kDqgYV-L-lAzA%1I7%vMG6=9;pO_&1Elf8IZ2CMg~ zZ1^mz5~SgEBb+1h)f8Co@r$woy#*QlkO%9aZdQh@=XG<@@Tu8cHl4k6BYs)QW*!FA zwOm?$1>PAGfxMc8!O;sD5ItGSg#QBU_XnBK)vTyBTdQC!xYZ^l+sZZtkH!(3#$|v- z3%urlxC7$0f}lAe?u59#;FUNO$C5*{0d;D2pf1e=)UCOJdNdv=uL(c}%?s4~L8#!< zTn)ou6BqZ93$}nyrKp1WNbWeAjo`yey zY%_+wd6QcaHTP(q9|Eg1Ba1g=H6I!R9Yz%JxlH#jo5|Z=FXX-ae4Rfh`MjU!&zkyL zoIMF?AJ1F<5hO#6$6)Ou1d#9J`GvZ-jou`vdazZgzYb4(c>cs10asiVk;fYV06ZJ! z`BODFNL(h_RnV;Fc4+j_Y;v*2^WyX8=I0>mF5nRf<;o=gWK{enf#IM(Z#uFdwCXNr z4X%js`=TQCuj|gIsH84s65!JU9+6!I6`TV5!~`xZeH-KVN5!)w1&+6x%40zoL{iOw z$Tw$kOjK87003kHHo~qA~F_VkHStS;+!Ip~&gwanNOI>f38(HxYPAr>@B0zQd*mm;#K5Mixma zS1@@EVHq`xLOgRx7BgUXG$t07#e!mxfhPHfET4a(ju1fG;G`sBz2F%bL03~UKob^} zP(vG(m?_siRIXDKg{oR|1_wF^#E8@c`~2C(*)y|CGYd88PHj75HJ=$iD+Be8;jSl@Hh2lO&$i1n$J^pHxJ!;(t4vnljf?PC{uabD zXr!JdjkMJf0;~ub}xFD=DNO;EPtqN8$$k+nakd^FYZ2oG* zO7=0fV0DL;9As>5)gddnhp~mLQ7ak4Mpf2xl7SBMPq5euRS_)3wS-2lKbT4qI95`GF949Or`EL0Ui>71#IzXoRC44?w3^Q{(rn>ja8_zEZN^gzop4$ggR?itAaSW z9qs^Y(yT+XNgM()=B2UAwv)`O-z?ZYOtUg?C|GS9EK=Z_K@*K@RdlW7Q@oe!EcuqnRx@ zRNQs7>#}7CfQjKyEr=fQ1q)M7QIKfOAZt1sk(_62xIh5S3V`xp)M)W+oI#Ex4 zr5Qn>&>-NPq72BmOeX2GajMEh(=wbCUmv8U1}DWQ_wA1zDAda2(f|~T=rDq~&&ZP^ zEp?QZp+%_poonGgG?ZXg*f0JBBJVSo!1a1{l={!j!?EJ6{PcI7z<1WO=16O~cWpQK zo?}QClmJg`7z3MOi#=g**w1E%_91^H4bCs>*{Qq;*eOxuI2}gpNIDC^xiSTjdWl~^ zP0BcsB2)67Ht!AT)8T&|{io5Njs1KKa@H5$S$ccvtqV6{Q`bt+9r@Y+z(s=aZhj}N zAE1__2)DfQ#Q4*%?gb3yLd=Xtk3|WBp=2+RsL#Y*cm<9xRN8xkL(|%D0Kh9L2A={P zKv7Y_8=M3x5m6g}ehtsebwr3z!+;NR6+l>7B4b!D4uDJ`c>-H-krV`mUA+$Pnk{%N zEnwJz2goWGU{@8iNNWnjR2<&s86yRF{15O~4pEI!^)lY@`jJ};_xb*Ne1Dl2E4)}d z0oV)r|LuC9ZC!mpCuuYJ)67pY~#e%N!nr`$GN zX&WwiYd6+D0>gur4p2S)Z~QcSJYf4(fIr^n_*E|p)CdBLf*FVS4A`*XlM<9o1U(>@g_*4`8H$)IO-LxJxM_ODjwpFg&-IgxczfO8f zw&r@&Cg-BHMhP^GW&<=8^wh`jH?IPxahEM858M;305biu^_=E7RaADhOWTH?6E1k+~H{RxEJQ<7ZH;)wG7QH}Dtkx6a%= zqepsmUq}yjSA8z8^K%Bsmq@?{3Nrri%~wjk{*B`RVE95-e9POl(OLG2fJx9Derwmp zu96^@IMI}o$fBQBfxz_gmMf^3s7lzfj8O*Cqb?(44G9cxb4J=4S=QDdx1dh)E|Tvf z`2moJew3IdJOL-Pt?&eDWx7B5fWrQ;N$eHKf>#60co`h~fVBJAvWDJ3NWlfL!_&MaTf+(}c$)1Ujea5k zpb7tYC_r*auDkA5TMIs4uonahsn`oX2>q9sLZI0Sl31L_JdCFGO|X|fEd&rHhQGBwek%BO5uWE3v4#ooQ={dp1svBE%=A( zf%ZGJPAz;l&?-Y)TVt7$08>qXJ6Sa*fPXI~){x1EHqjvs;=1o4**x~T00{}1=WpV1L_~cvzQ2z`K-bFCr zTp*PJs8Qf70}m|X*(+eXoP>ve*ey2#mNgT{*5c$4zJteKAjwgbo&w|-8!?3|icf;3 zJwY3tfU(-@LE&L7WU^mefJn`PjiLp68PetjaZ4UR9C*&kxArpRQ!c?L5Vy3O#N2j* z{W=Qs%6{}nzpyt=o%<0a;jktRfx)WHhAqiJUVaQke(kg!4r@Ue*1o8DoOL?|eOAuF zYoH7mKFl|fFcS(_15E$G?N{`{p~~Psy}Rdjq!t|>sSF>|DFr>?1M3cny`=^!+oL=j zy+emmlHtNoN)Z!Gp|A>-&$yfhPeBrDB^zxfU{VKzOp))gP!z*~$B1$?Xz(w?r#ZFL zYMe}h-%yaFSRc2x22DDU9x&`H@^w(OFU_5!hhL7>E6@$YK@fzZn1{h%L(I;^^-)RE zBWJ^z$iZQ^WPYpA)x>gEzqctDiV1;j1{fOcHTeMR`YR-VjpWBjHjz*S=_e3_Z#$&x zM8Z6}(A303el!H!NcFEatOS0r5lF6N0i=s(uHqTp)s05*adxX69_}r- zk5$^o);+q=eqZRlC-jzu9Ti~*8o|d|y>H}x-^9JXiBBV?zKL?*bfs^4{mgH{HlY3z z9Cd6YH^@e^;u|cU+VXdTncyFQj|4d3duQG_voU<9y?Ca~#VTB^#Kj);iN#rcV5s=K zzH1b|@X&|%Alx)GQk?m1p#9c2Hu%kN{Lo(x?5+fMSD7&fa5Io~`_{ldJsib$91|_s)Cn zotwj-L`v?RW%tt+_tPc!(+`5}>khrMYh5sj&{t&~9{*PCh#uUj2mAG&@v4iP@PEz# z`4UOlyLWx&)-#&|xJ$j_dH|*lX4EsJ_Ybbmug~i~{6xYtwD~*@J{|ex=)K7oX`GgL zAoO7ILwZ-=`mEmFzyAEz;K*jCJb17&c(CLhc+fXma(C-)-#gy7y<6U181vS^;O10$ zU|$8MqO(`uIfnPZINiHo!}*|JKt8v7JTMnPstwY`v~_Q_b>6!6?veM7=$)e8(T_CL zi4>~q=&#zWeII5zd#i1b{2ZmpmsF&5baDHUu4)gt}G1dh;lW88;?=3)$T zDdviLy(zc$9xcI=<5n`kV?K8f#QeQC->$g?90CBy)Gt*m+FS z8132;w#b==H?4=}q7za#)@d%?#8e`ouA$!;n1d){$)G7ow)KO`6xxr*D5&X5Tc`Fb zjs2jt#`mzsLBkb~1K4bSueH2JeUdmq#Y4W0WG9j_B;P~wH$V)*`f5>D2!}bKZPk@@ z>Z0jlI@KFF_;`Sz%Vjvt&wl8N;n!jK=>fFn0?0X=hGRshsi?_YSp73deu`us$*V{Z z3NtbJ{TMrjWEu%-A1Z{Y&qG}lDw8ObL%D@Yu!rs==67gQBK_%%r$~QIkx+@L-Flr7Xv_12lrQtSPI=`OY2 zHUIvW`>m;)Q}>0P_k^8gVW@)lXhnFk=+=W>8_$%3yNW(tXuGuv4~y=q)85IIm~hq0 z^+FgyP+7q70ZQ>G0jQYm0;rhn`rHoX-t0C@hBi4owRGG(+{yCNfVe|;SXxqU8kkd+@WrOU_D$`10Or%Q>_3I;15a;^9U^SFvS&$ch IETtUszaYOY761SM literal 0 HcmV?d00001 diff --git a/api/__pycache__/main.cpython-311.pyc b/api/__pycache__/main.cpython-311.pyc new file mode 100644 index 0000000000000000000000000000000000000000..62d2e78e205978e4ab30b96e1cac23c7a2a59794 GIT binary patch literal 6296 zcmd5AU2Id=`P?7>JGK+YNg!!*fe>OKF*LO4Xh)Z%K>2B)By_Ed+fkLt_%lYWMK)j&3w3^LrOwq!%eP&S+l zv$kK2WLuN13=aU_mTcn)PmU8g_!*H~Xy`Vq=4*IrB^hP35YWO~Xzh#^0b1)8T8z=! zfEL|C>!5<%PCYZc9Q#bX4Ilct@i~(0X*A_ zIe8m{fURx6?Sh%`)Cxyn=JrOb`7L+f=3pc|wIcV>ZrVe8X84T{!75+cvdWi$V{cdLdgO}osFVHSIF7I92w|S<_PZ50L`xn3PA3J((TpBxj z;;?^6nwU$Q63xkZP05+kjHXMMsh*>%q~%Q|t1PBXMaxOE^NO6#WhhJ-o25BZ$vEi_ z28u~_n0AFq$Ing=$vR*;$dT=ry~v!iKaA(zQ$Ig?&9v*}!V zmg>+go=ux`@b(-_8zw9m(3ZESr_Ua}mZ7jbEe9Q;i8JS>-ce*(rB~BBg`U8a37^j# zQ&eiidD}OhHt0K=OjX-;OrffblVXd zcsdA;TZ9s@P9({nCsd%K%)J4lEs?_M z!s$;>S6X`uN0&w{u@`EIK|u7b#_a#;aNKZI&4&}L9gdNQO;Fp#Ze-f#wab%nkKTr# ziXv!75JS*`;3WXIcu~_-y%XDPO=zg}?gr=s#6U%K>@5%~oGhID^kgL*EsQUTg)@aS zpTbi(Q)7X%9ouSH&Zjm|n(rJmPe3hP6P%ix6v6ViWfAYcKE}1?-ZaKTy)eeJKE^A% zWZ#%Z6ioUV-xk&iB?HtYhZa4A0H1KZFWA%xdIUO|&`#QIrk2-KZFV7b$(rZUIh+wFwHgBekcxbb{;4<`G2sR`KCBhcK-03Ly?ECtD z#D)+g5Nwht3HuQA!(;pvfJ35U+l%LyM^+D7v7u6Is2m$AoLDj}aR_Qj9IAA57wPiE z%9zzLQ0f>ccMKFx76nTjfLam<7KG|?jFdY@Y6C#+MoQwyW*w~Tj$2~K zval@w)^CY`$4g?o(m!a4v7)g&{p&?b1bnb04p#d5m*-cf?wtGPEvs*=)Hhb{8?(gD z<)|gXd#uDDAT`0q{W%05?-9!uV<&S-Fn1toJPHqW1JGX6>S=~A{Konk} z)u_ulMi5N3ZBbS-=H@$i8OaES;n}Ly&B|N}*3B0hvD)TvqAjHJ`OJEF13icf;7^~T zZ8-%^3_O2I-Xpg-2nh)&1vkmo5$E(*V2FVeBpt1c?NyZ-YUI;7eH5A_NJSC*C;STJ zk?1QN-T1LJEWX72?x~u4t)EfNy`ljGPvtTdQ>Bcb&F!6zi*`#|Rkf=rO;=`>oME@t zni-wS=%~`F(HN1X=A0(4H|J7tw6UWcGvdoN}!iTj|-R23_N-? zaK&J`t(viC%lP{ zu3luX?7PnYj{JcK%MH8Ui2bkgH+Y$R!vCDV%3USbgb&D7E*_Y)Lubw&og6!nfgWeHS-jxvkrYFMa6vMtIV>H$me+JsQXuG4bi>>)nR5`PftctENK9w*Dm^THL~FyG zjD9>kJp9qIIIs7>VwsSDf?)75{rDMN)nW@?x-^r92uwaKeZ|{>nmmCSm#OWRsR^XG zAAG!FduZ;8qH8%@n5Cxeg1AP>*IYOrydFk?tZmrpDR^22Ls|MT={}Te>8GFcCWr=g%6g(2g~7umiOTI4+HI0!oieg_%PJHDp-e3{AB_F*3ijP=wvx`(mFe3 zd8fXIQWbC#AO=P^cU}9&4wAbE{o^s=?(4Dfh;T3B0({Yzs0Xanp-?E?Org~O8-=>T z{V;_B<-*W|6~Nck6ma$z0ulgUS53f!wb6utGJ;}$y{?>~j2P3^i|o11t$CdaHAO%f zQIzYYu8iwng`nOi`|HZM$wV+^3~W-y;NoxmAc-%|l~hg3A7l#*JG9K1X;h571-a#;l0Cii(%lrW!WAT7~1_n2*0sHD%=x|yCZ$m-!DWD~h{ty6I zgvful2qIX7P-1rSNDuJZgDI8#dzt zvl$Iow;At#3pV3jtf6zI(7AHxob~>PmiNPk&A7m928JnjU1PrSFu51@kBu)X;O_-g|CbktWJ&4cZ!C-OQPh?+35gqLN8 z685oZ5(76D*V&=8ET=X;TocDbNM>on7+|4QQ zoN4<~jl;Fwijjh&#q@q2GLAOnDjBinIgo$mK=RFf zRf6y3>$P_?L>>p$M^<+>nC!C#UPUgWWp1>}cX1cE;+s{1Z)*?4NQg8;Wb@kk(CTZX zrFH48V%IMxznpv!+<8B^vlQ$vW8GH{zI-QH3cd+qD+C`1(fdNQB*e->4EOaOR}np* zpSW>iDN}sC`1)!Tejj|*yVm=SY(*zZ(TQ?&;@iE26D9F{Sv+rv=c^$i2A3A^3)}EU p55P$mCh=Dh!-eCYohk~;@zs7yOq9e#Sxf*@t5p{aW;0ll@^5*$$5#LV literal 0 HcmV?d00001 diff --git a/api/agentic.py b/api/agentic.py new file mode 100644 index 0000000..95ad6c1 --- /dev/null +++ b/api/agentic.py @@ -0,0 +1,329 @@ +""" +Agentic optimization loop for FERB kernel solutions. + +This module provides: +- GPT chat replies with FERB context +- Iterative candidate generation + optional evaluation +""" + +from __future__ import annotations + +import json +import os +import re +import subprocess +import uuid +from dataclasses import dataclass +from pathlib import Path +from typing import Any + +from openai import OpenAI + + +ROOT_DIR = Path(__file__).resolve().parent.parent +REFERENCE_DIR = ROOT_DIR / "reference" +RUNS_DIR = ROOT_DIR / ".agent_runs" + + +SYSTEM_CHAT_PROMPT = """You are an expert multi-GPU systems and CUDA engineer working on FERB. +Focus on practical kernel optimization advice for distributed training speedups. +Keep answers concise and action-oriented. +""" + + +SYSTEM_OPTIMIZER_PROMPT = """You are an autonomous kernel optimization agent for FERB. +You are iteratively improving solution quality. +Always return valid Python code for a FERB solution module with a `solution(...)` function. +Do not include markdown fences. +""" + + +@dataclass +class IterationResult: + iteration: int + candidate_path: str + score: float | None + evaluator_stdout: str + evaluator_stderr: str + model_feedback: str + + +def _openai_client() -> OpenAI: + api_key = os.environ.get("OPENAI_API_KEY", "").strip() + if not api_key: + raise RuntimeError("OPENAI_API_KEY is not set") + return OpenAI(api_key=api_key) + + +def _extract_python_code(text: str) -> str: + """ + Accept raw code or fenced markdown and return Python source. + """ + src = (text or "").strip() + fence = re.search(r"```(?:python)?\s*(.*?)```", src, flags=re.DOTALL | re.IGNORECASE) + if fence: + return fence.group(1).strip() + return src + + +def _read_problem_reference(problem_id: int) -> str: + path = REFERENCE_DIR / f"{problem_id}.py" + if not path.exists(): + return f"# Missing reference file for problem {problem_id}: {path}" + return path.read_text(encoding="utf-8") + + +def _read_problem_descriptions() -> str: + path = REFERENCE_DIR / "problems.md" + if not path.exists(): + return "" + return path.read_text(encoding="utf-8") + + +def gpt_chat_reply(message: str, model: str = "gpt-4o-mini") -> str: + """ + GPT-backed chat with FERB context. + """ + client = _openai_client() + response = client.responses.create( + model=model, + input=[ + {"role": "system", "content": SYSTEM_CHAT_PROMPT}, + {"role": "user", "content": message}, + ], + ) + return (response.output_text or "").strip() + + +def _generate_candidate( + *, + model: str, + objective: str, + problem_id: int, + iteration_idx: int, + previous_best_code: str | None, + previous_feedback: str | None, + topology_json: str | None, +) -> tuple[str, str]: + """ + Returns (candidate_code, model_feedback). + """ + client = _openai_client() + reference_code = _read_problem_reference(problem_id) + problem_docs = _read_problem_descriptions() + best_code_section = previous_best_code if previous_best_code else "# none yet" + feedback_section = previous_feedback if previous_feedback else "# first iteration" + topology_section = topology_json if topology_json else "{}" + + user_prompt = f""" +Objective: +{objective} + +Problem ID: +{problem_id} + +Iteration: +{iteration_idx} + +Problem notes: +{problem_docs} + +Reference implementation: +{reference_code} + +Current best candidate: +{best_code_section} + +Feedback from previous iteration: +{feedback_section} + +Topology JSON: +{topology_section} + +Task: +1) Write an improved solution module. +2) Keep function signature compatible with reference. +3) Prioritize correctness first, then performance. +4) Return ONLY code. +""" + + response = client.responses.create( + model=model, + input=[ + {"role": "system", "content": SYSTEM_OPTIMIZER_PROMPT}, + {"role": "user", "content": user_prompt}, + ], + ) + code = _extract_python_code(response.output_text) + + feedback_prompt = f""" +You proposed this candidate. Give a short self-critique with: +- likely strengths +- likely risks +- what to change next iteration + +Candidate code: +{code} +""" + feedback_resp = client.responses.create( + model=model, + input=[ + {"role": "system", "content": "You are a strict code reviewer for FERB kernels."}, + {"role": "user", "content": feedback_prompt}, + ], + ) + feedback = (feedback_resp.output_text or "").strip() + return code, feedback + + +def _parse_score(stdout: str) -> float | None: + """ + Accept score in either: + - JSON object: {"score": 123.4} + - plain text line: score=123.4 + """ + txt = (stdout or "").strip() + if not txt: + return None + + lines = [line.strip() for line in txt.splitlines() if line.strip()] + if not lines: + return None + + # Try JSON from last line then full text + for candidate in (lines[-1], txt): + try: + obj = json.loads(candidate) + if isinstance(obj, dict) and "score" in obj: + return float(obj["score"]) + except Exception: + pass + + # Try score=... + for line in reversed(lines): + m = re.search(r"score\s*=\s*([0-9]+(?:\.[0-9]+)?)", line, flags=re.IGNORECASE) + if m: + return float(m.group(1)) + + return None + + +def _run_evaluator(command_template: str, candidate_path: Path, timeout_s: int) -> tuple[float | None, str, str]: + """ + Run evaluator command and parse score from stdout. + """ + command = command_template.format(candidate_path=str(candidate_path)) + proc = subprocess.run( + command, + shell=True, + cwd=str(ROOT_DIR), + capture_output=True, + text=True, + timeout=timeout_s, + ) + stdout = proc.stdout or "" + stderr = proc.stderr or "" + score = _parse_score(stdout) + return score, stdout, stderr + + +def run_agentic_optimization( + *, + objective: str, + problem_id: int, + iterations: int = 3, + model: str = "gpt-4o-mini", + topology_json_path: str | None = None, + evaluator_command: str | None = None, + evaluator_timeout_s: int = 240, +) -> dict[str, Any]: + """ + Iterative generate/evaluate/select loop. + """ + if iterations < 1: + raise ValueError("iterations must be >= 1") + + topology_json = None + if topology_json_path: + topo_path = Path(topology_json_path) + if topo_path.exists(): + topology_json = topo_path.read_text(encoding="utf-8") + + run_id = uuid.uuid4().hex[:12] + run_dir = RUNS_DIR / run_id + run_dir.mkdir(parents=True, exist_ok=True) + + best_code: str | None = None + best_score: float | None = None + best_candidate_path: str | None = None + previous_feedback: str | None = None + trace: list[IterationResult] = [] + + for idx in range(1, iterations + 1): + code, model_feedback = _generate_candidate( + model=model, + objective=objective, + problem_id=problem_id, + iteration_idx=idx, + previous_best_code=best_code, + previous_feedback=previous_feedback, + topology_json=topology_json, + ) + candidate_path = run_dir / f"candidate_iter_{idx}.py" + candidate_path.write_text(code, encoding="utf-8") + + score = None + eval_stdout = "" + eval_stderr = "" + if evaluator_command: + score, eval_stdout, eval_stderr = _run_evaluator( + evaluator_command, + candidate_path, + timeout_s=evaluator_timeout_s, + ) + + choose_new_best = False + if best_code is None: + choose_new_best = True + elif score is not None and (best_score is None or score > best_score): + choose_new_best = True + + if choose_new_best: + best_code = code + best_score = score + best_candidate_path = str(candidate_path) + + previous_feedback = model_feedback + trace.append( + IterationResult( + iteration=idx, + candidate_path=str(candidate_path), + score=score, + evaluator_stdout=eval_stdout, + evaluator_stderr=eval_stderr, + model_feedback=model_feedback, + ) + ) + + return { + "run_id": run_id, + "run_dir": str(run_dir), + "problem_id": problem_id, + "objective": objective, + "iterations": iterations, + "model": model, + "best_score": best_score, + "best_candidate_path": best_candidate_path, + "best_code": best_code, + "trace": [ + { + "iteration": t.iteration, + "candidate_path": t.candidate_path, + "score": t.score, + "model_feedback": t.model_feedback, + "evaluator_stdout": t.evaluator_stdout, + "evaluator_stderr": t.evaluator_stderr, + } + for t in trace + ], + } diff --git a/api/main.py b/api/main.py new file mode 100644 index 0000000..007cbfa --- /dev/null +++ b/api/main.py @@ -0,0 +1,130 @@ +""" +FERB API: +- Chat endpoint for kernel optimization guidance +- Agentic optimization endpoint for iterative GPT-driven solution improvement +""" + +import os +from contextlib import asynccontextmanager + +from pathlib import Path + +from fastapi import FastAPI +from fastapi import HTTPException +from fastapi.middleware.cors import CORSMiddleware +from fastapi.staticfiles import StaticFiles +from pydantic import BaseModel, Field + +from agentic import gpt_chat_reply +from agentic import run_agentic_optimization + + +# --------------------------------------------------------------------------- +# API +# --------------------------------------------------------------------------- + +class ChatRequest(BaseModel): + message: str + model: str = "gpt-4o-mini" + + +class ChatResponse(BaseModel): + reply: str + ok: bool = True + + +class OptimizeRequest(BaseModel): + objective: str = Field(..., min_length=8) + problem_id: int = Field(..., ge=1) + iterations: int = Field(default=3, ge=1, le=10) + model: str = "gpt-4o-mini" + topology_json_path: str | None = None + evaluator_command: str | None = None + evaluator_timeout_s: int = Field(default=240, ge=10, le=3600) + + +class OptimizeResponse(BaseModel): + ok: bool = True + result: dict + + +@asynccontextmanager +async def lifespan(app: FastAPI): + yield + + +app = FastAPI(title="FERB Kernel Optimization Chat", lifespan=lifespan) + +app.add_middleware( + CORSMiddleware, + allow_origins=["*"], + allow_credentials=True, + allow_methods=["*"], + allow_headers=["*"], +) + + +@app.get("/health") +def health() -> dict[str, str]: + return {"status": "ok"} + + +@app.post("/chat", response_model=ChatResponse) +def chat(body: ChatRequest) -> ChatResponse: + # GPT-backed chat; if no API key, return setup guidance. + if not os.environ.get("OPENAI_API_KEY", "").strip(): + return ChatResponse( + reply=( + "OPENAI_API_KEY is not set. Export it first, then retry. " + "Example: `export OPENAI_API_KEY=...`" + ), + ok=False, + ) + try: + reply = gpt_chat_reply(body.message, model=body.model) + return ChatResponse(reply=reply, ok=True) + except Exception as exc: + raise HTTPException(status_code=500, detail=f"chat failed: {exc}") from exc + + +@app.post("/optimize", response_model=OptimizeResponse) +def optimize(body: OptimizeRequest) -> OptimizeResponse: + """ + Agentic optimization loop: + - Generate candidate code with GPT + - Optionally evaluate candidate via command + - Keep best candidate + """ + if not os.environ.get("OPENAI_API_KEY", "").strip(): + raise HTTPException( + status_code=400, + detail="OPENAI_API_KEY is required for /optimize", + ) + try: + result = run_agentic_optimization( + objective=body.objective, + problem_id=body.problem_id, + iterations=body.iterations, + model=body.model, + topology_json_path=body.topology_json_path, + evaluator_command=body.evaluator_command, + evaluator_timeout_s=body.evaluator_timeout_s, + ) + return OptimizeResponse(ok=True, result=result) + except Exception as exc: + raise HTTPException(status_code=500, detail=f"optimization failed: {exc}") from exc + + +# Serve frontend (whitespace chatbot) when running from repo root +_frontend = Path(__file__).resolve().parent.parent / "frontend" +if _frontend.is_dir(): + app.mount("/", StaticFiles(directory=str(_frontend), html=True), name="frontend") + + +# --------------------------------------------------------------------------- +# Dev server (optional) +# --------------------------------------------------------------------------- + +if __name__ == "__main__": + import uvicorn + uvicorn.run(app, host="0.0.0.0", port=8000) diff --git a/api/requirements.txt b/api/requirements.txt new file mode 100644 index 0000000..3c785d6 --- /dev/null +++ b/api/requirements.txt @@ -0,0 +1,4 @@ +fastapi>=0.115.0 +uvicorn[standard]>=0.32.0 +pydantic>=2.0.0 +openai>=1.55.0 diff --git a/frontend/README.md b/frontend/README.md new file mode 100644 index 0000000..23475b1 --- /dev/null +++ b/frontend/README.md @@ -0,0 +1,30 @@ +# FERB Chat Frontend + +Whitespace-style AI chatbot for kernel optimization and faster ML training. + +## Run everything (API + frontend) + +From the **project root** (FERB): + +```bash +cd api && pip install -r requirements.txt && uvicorn main:app --reload --host 0.0.0.0 --port 8000 +``` + +Then open **http://127.0.0.1:8000** in your browser. The same server serves the UI and the `/chat` API. + +## Run frontend only (e.g. static server) + +If the API runs elsewhere, open `index.html` or serve `frontend/` with any static server and set the API base: + +```html + + +``` + +Or run from `frontend/`: + +```bash +npx serve -p 3000 +``` + +Then set `FERB_API_BASE` to your API URL (e.g. `http://127.0.0.1:8000`) if the API is on a different port. diff --git a/frontend/app.js b/frontend/app.js new file mode 100644 index 0000000..362892a --- /dev/null +++ b/frontend/app.js @@ -0,0 +1,94 @@ +(function () { + const API_BASE = window.FERB_API_BASE || ""; + + const form = document.getElementById("chatForm"); + const input = document.getElementById("input"); + const submitBtn = document.getElementById("submit"); + const messagesEl = document.getElementById("messages"); + + function escapeHtml(text) { + const div = document.createElement("div"); + div.textContent = text; + return div.innerHTML; + } + + function nl2br(text) { + return escapeHtml(text).replace(/\n/g, "
"); + } + + function addMessage(role, content, options = {}) { + const div = document.createElement("div"); + div.className = "message message--" + role + (options.loading ? " message--loading" : ""); + div.setAttribute("data-role", role); + + const bubble = document.createElement("div"); + bubble.className = "message__bubble"; + if (options.markdown) { + bubble.innerHTML = nl2br(content); + } else { + bubble.innerHTML = "

" + nl2br(content) + "

"; + } + div.appendChild(bubble); + messagesEl.appendChild(div); + messagesEl.scrollTop = messagesEl.scrollHeight; + return div; + } + + function setMessageContent(el, content) { + const bubble = el.querySelector(".message__bubble"); + if (!bubble) return; + el.classList.remove("message--loading"); + bubble.innerHTML = "

" + nl2br(content) + "

"; + messagesEl.scrollTop = messagesEl.scrollHeight; + } + + async function sendMessage(text) { + if (!text.trim()) return; + + addMessage("user", text.trim()); + input.value = ""; + input.style.height = "auto"; + + const loadingEl = addMessage("assistant", "", { loading: true }); + submitBtn.disabled = true; + + try { + const res = await fetch(API_BASE + "/chat", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ message: text.trim() }), + }); + const data = await res.json(); + if (data.reply) { + setMessageContent(loadingEl, data.reply); + } else { + setMessageContent(loadingEl, "Sorry, something went wrong. " + (data.detail || "")); + } + } catch (err) { + setMessageContent( + loadingEl, + "Could not reach the API. Is it running? Start with: cd api && uvicorn main:app --reload" + ); + } finally { + submitBtn.disabled = false; + input.focus(); + } + } + + form.addEventListener("submit", function (e) { + e.preventDefault(); + sendMessage(input.value); + }); + + input.addEventListener("input", function () { + input.style.height = "auto"; + input.style.height = Math.min(input.scrollHeight, 12 * 24) + "px"; + }); + + input.addEventListener("keydown", function (e) { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + form.requestSubmit(); + } + }); +})(); diff --git a/frontend/index.html b/frontend/index.html new file mode 100644 index 0000000..da474ba --- /dev/null +++ b/frontend/index.html @@ -0,0 +1,49 @@ + + + + + + FERB — Kernel Optimization + + + + + + +
+
+ + Kernel optimization & faster ML training +
+ +
+
+
+
+

Ask about problems, NVSHMEM, Modal runs, MoE, or how to train models faster.

+
+
+
+ +
+ + +
+
+ +
+ Whitespace AI · FERB +
+
+ + + diff --git a/frontend/style.css b/frontend/style.css new file mode 100644 index 0000000..bd32f87 --- /dev/null +++ b/frontend/style.css @@ -0,0 +1,233 @@ +/* --- Variables --- */ +:root { + --bg: #fafaf9; + --bg-elevated: #ffffff; + --text: #1c1917; + --text-muted: #78716c; + --border: #e7e5e4; + --accent: #0f766e; + --accent-hover: #0d9488; + --radius: 12px; + --font-sans: "DM Sans", system-ui, sans-serif; + --font-mono: "JetBrains Mono", ui-monospace, monospace; + --space-xs: 0.25rem; + --space-sm: 0.5rem; + --space-md: 1rem; + --space-lg: 1.5rem; + --space-xl: 2rem; + --space-2xl: 3rem; + --space-3xl: 4rem; + --space-4xl: 6rem; + --space-5xl: 8rem; + --max-width: 42rem; + --input-min-height: 52px; +} + +/* --- Reset & base --- */ +*, *::before, *::after { + box-sizing: border-box; +} + +html { + font-size: 18px; + -webkit-font-smoothing: antialiased; +} + +body { + margin: 0; + min-height: 100vh; + font-family: var(--font-sans); + font-weight: 400; + color: var(--text); + background: var(--bg); + line-height: 1.6; +} + +/* --- Layout --- */ +.app { + min-height: 100vh; + display: flex; + flex-direction: column; + max-width: 100vw; +} + +.header { + flex-shrink: 0; + padding: var(--space-3xl) var(--space-xl) var(--space-2xl); + text-align: center; + border-bottom: 1px solid var(--border); +} + +.logo { + display: block; + font-size: 1rem; + font-weight: 500; + letter-spacing: 0.12em; + text-transform: uppercase; + color: var(--text-muted); + margin-bottom: var(--space-sm); +} + +.tagline { + font-size: 0.95rem; + font-weight: 300; + color: var(--text-muted); +} + +.main { + flex: 1; + display: flex; + flex-direction: column; + width: 100%; + max-width: var(--max-width); + margin: 0 auto; + padding: var(--space-4xl) var(--space-xl) var(--space-2xl); + min-height: 0; +} + +/* --- Messages --- */ +.messages { + flex: 1; + display: flex; + flex-direction: column; + gap: var(--space-3xl); + padding-bottom: var(--space-2xl); + overflow-y: auto; +} + +.message { + display: flex; + width: 100%; +} + +.message--user { + justify-content: flex-end; +} + +.message--user .message__bubble { + background: var(--text); + color: var(--bg); + border-radius: var(--radius) var(--radius) var(--radius) 2px; +} + +.message--assistant .message__bubble { + background: var(--bg-elevated); + color: var(--text); + border: 1px solid var(--border); + border-radius: var(--radius) var(--radius) 2px var(--radius); +} + +.message__bubble { + max-width: 85%; + padding: var(--space-lg) var(--space-xl); + font-size: 0.95rem; + white-space: pre-wrap; + word-break: break-word; +} + +.message__bubble p { + margin: 0 0 var(--space-sm); +} + +.message__bubble p:last-child { + margin-bottom: 0; +} + +.message--loading .message__bubble::after { + content: ""; + display: inline-block; + width: 4px; + height: 1em; + background: var(--text-muted); + animation: blink 0.8s ease-in-out infinite; + margin-left: 2px; + vertical-align: -0.2em; +} + +@keyframes blink { + 0%, 100% { opacity: 0.3; } + 50% { opacity: 1; } +} + +/* --- Input --- */ +.input-wrap { + flex-shrink: 0; + display: flex; + align-items: flex-end; + gap: var(--space-md); + padding: var(--space-md) 0; + border-top: 1px solid var(--border); +} + +.input { + flex: 1; + min-height: var(--input-min-height); + max-height: 12rem; + padding: var(--space-md) var(--space-lg); + font-family: var(--font-sans); + font-size: 0.95rem; + color: var(--text); + background: var(--bg-elevated); + border: 1px solid var(--border); + border-radius: var(--radius); + resize: none; + transition: border-color 0.15s ease, box-shadow 0.15s ease; +} + +.input::placeholder { + color: var(--text-muted); +} + +.input:focus { + outline: none; + border-color: var(--accent); + box-shadow: 0 0 0 2px rgba(15, 118, 110, 0.12); +} + +.submit { + flex-shrink: 0; + width: var(--input-min-height); + height: var(--input-min-height); + display: flex; + align-items: center; + justify-content: center; + font-size: 1.25rem; + color: var(--bg); + background: var(--accent); + border: none; + border-radius: var(--radius); + cursor: pointer; + transition: background 0.15s ease, transform 0.1s ease; +} + +.submit:hover { + background: var(--accent-hover); +} + +.submit:active { + transform: scale(0.98); +} + +.submit:disabled { + opacity: 0.5; + cursor: not-allowed; + transform: none; +} + +.submit__arrow { + line-height: 1; +} + +/* --- Footer --- */ +.footer { + flex-shrink: 0; + padding: var(--space-xl); + text-align: center; + border-top: 1px solid var(--border); +} + +.footer__text { + font-size: 0.8rem; + color: var(--text-muted); + font-weight: 300; +} From 29f95b603fa53e2f75b01b163140698d892a6383 Mon Sep 17 00:00:00 2001 From: rohkymntn Date: Sat, 14 Feb 2026 15:35:46 -0800 Subject: [PATCH 2/2] Add Modal evaluator, IDE panel, and evaluator heartbeats Route Triton evaluations through Modal H100x8, add live evaluator heartbeat events, improve local CPU fallback for reference backend, and upgrade the frontend to a left-chat/right-IDE layout with live typing. Co-authored-by: Cursor --- .gitignore | 13 + api/README.md | 53 ++ api/__pycache__/agentic.cpython-311.pyc | Bin 11722 -> 0 bytes api/__pycache__/main.cpython-311.pyc | Bin 6296 -> 0 bytes api/agentic.py | 660 +++++++++++++++++- api/main.py | 43 ++ api/requirements.txt | 1 + frontend/app.js | 298 +++++++- frontend/index.html | 51 +- frontend/style.css | 89 ++- reference/__pycache__/1.cpython-310.pyc | Bin 1246 -> 0 bytes scripts/benchmark_candidate.py | 261 +++++++ scripts/modal_benchmark.py | 149 ++++ scripts/worker.py | 175 +++++ .../construct_prompt.cpython-310.pyc | Bin 6265 -> 0 bytes .../construct_prompt.cpython-312.pyc | Bin 7277 -> 0 bytes utils/init_and_finalize_backends.py | 37 +- 17 files changed, 1757 insertions(+), 73 deletions(-) create mode 100644 .gitignore delete mode 100644 api/__pycache__/agentic.cpython-311.pyc delete mode 100644 api/__pycache__/main.cpython-311.pyc delete mode 100644 reference/__pycache__/1.cpython-310.pyc create mode 100644 scripts/benchmark_candidate.py create mode 100644 scripts/modal_benchmark.py create mode 100644 scripts/worker.py delete mode 100644 utils/__pycache__/construct_prompt.cpython-310.pyc delete mode 100644 utils/__pycache__/construct_prompt.cpython-312.pyc diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1cdf221 --- /dev/null +++ b/.gitignore @@ -0,0 +1,13 @@ +.DS_Store + +# Python bytecode / caches +__pycache__/ +*.py[cod] + +# Local agent run artifacts +.agent_runs/ + +# Local virtualenvs +.venv/ +venv/ + diff --git a/api/README.md b/api/README.md index 751e8e4..22f882c 100644 --- a/api/README.md +++ b/api/README.md @@ -4,6 +4,7 @@ This API now supports: - `POST /chat` - GPT-backed kernel optimization chat - `POST /optimize` - iterative agent loop for solution improvement +- `POST /optimize/stream` - live event stream of agent iterations ## Setup @@ -13,6 +14,10 @@ python -m venv .venv source .venv/bin/activate pip install -r requirements.txt export OPENAI_API_KEY="your_key_here" +# If you plan to run evaluator on Modal (recommended for Triton backend): +modal token new +# Optional but recommended: point to Python with torch installed for evaluator runs +export FERB_EVAL_PYTHON="/absolute/path/to/python-with-torch" uvicorn main:app --reload --host 0.0.0.0 --port 8000 ``` @@ -57,3 +62,51 @@ Outputs are saved in: - `/Users/rohk/FERB/.agent_runs//candidate_iter_.py` +### Real speedup evaluator (recommended) + +Use the provided distributed benchmark as evaluator: + +```bash +torchrun --nproc-per-node 8 /Users/rohk/FERB/scripts/benchmark_candidate.py \ + --problem 1 \ + --candidate {candidate_path} \ + --rows 1024 --cols 1024 --dtype float32 \ + --warmup 3 --iters 10 --score-only +``` + +For API usage, pass that whole string as `evaluator_command`. + +## `POST /optimize/stream` (live thinking/run events) + +This endpoint streams JSON events using Server-Sent Events (SSE), so you can watch: + +- run start +- iteration start +- candidate generated +- evaluation start/completed (if evaluator is configured) +- best update +- run completed + +Example: + +```bash +curl -N -X POST "http://127.0.0.1:8000/optimize/stream" \ + -H "Content-Type: application/json" \ + -d '{ + "objective": "Improve throughput for problem 1 while preserving correctness", + "problem_id": 1, + "iterations": 3, + "model": "gpt-4o-mini" + }' +``` + +You will receive events like: + +```text +data: {"type":"run_started", ...} +data: {"type":"iteration_started","iteration":1} +data: {"type":"candidate_generated","iteration":1,...} +data: {"type":"best_updated","iteration":1,...} +data: {"type":"run_completed","result":{...}} +``` + diff --git a/api/__pycache__/agentic.cpython-311.pyc b/api/__pycache__/agentic.cpython-311.pyc deleted file mode 100644 index 910e83d4323399460852b1fee6fd4df999fb6d15..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 11722 zcmb7Kd2Ab3dY>VO*9>nR)MaU`!!~7!N_@ppY&o(mJ4#~7D=XQx;*_B|Ba1dga^H-6 zFvBf3NU=)bBF=7#XuD2VT@+cTG0+r6fo!|mZWsM?7a(009tdHbxfY;D^xW$M83=%}_no9DWFikFAW$;d-5vqBlUwSRo|4)i?2!&joe=g) zPfHO9`=m+f8L9V8$I=ezh%^K_{nAls7lZ@Slr#pRD2+>9k{y0AY2vm+Iwl=_)48-$ zvY%t3&*nekr!UDFHI)>zIW@JK$|uxRHY299*_^nXCE}@>h2!ECnPlX&sASV?_*9AU z{NhSV5m&R)T3Qx!BzrX_$;u=@E}lNSC?;1Dsz~HqIwdROwUoMI){@L-RQb9Jg=STm zKs#4uF`39nDJh}KBJ_f0LgRaBU$U7*T9mIQ(rdI!{AV~nQPvO=nM_utdCDB5J@9%W znNBDQB)GE)bp=BEbmj&G&iS02nVvN~L|&GOoJq=gA(p@a$H`ho$^Ytnc1=tWSxjU^ z`Fc(!s<^tAR#W4r&%PuoHxyM~Rp6N`V=YQ8UhQykh&16@z(3!`!k0ZC1HOAN9Ox4#T zeM3yG(&fb7)RtTP_F5vHQg6ig>GZY44F#4~T_YJ0mL?^MXK$!0Fz{qnG6$A07u$Nd z#v_ys65?z1{5`Q)Ec%+byq2K@it#72A}p(z$|TckP}ORJT#>TZGU76>8q7$twm)cK zHxuj^FGA#fMrM{+kP)_*Kr+a7$#w*CB|F488goF*Y2~@BnA?hZXpDy)AsB%gcV;sS zvI5Lt*i#wR@YLogn=$OLa)#gH+;|RH?xA-%odvGf#JOxnHXKSaOJpNhW8W-^E2@-T zQ@7>F1S*0FlGE{JSow>IOk!1z#|=InH|G!H-gq1rqV^;Zk2mduc>E*g z3!IaO0~0SPz%vshyK-d$nR6nMOHELgPbFiy8w7}>pnnezsd?{3WzK|CTqE3*0#Yq($$;#cGwQKw&E)W=rIf=#oL-I{IFQ7_dTO4}B3v-zPF zZ|&_lMq_cY^QU+buIXe_&VgD>Tr*`I=te48$A|LkMu7&Z_iPDM_TEe;?zLDP0neL<5Gh1h?_0PQC<_| zok{pB7l9O+s+;i!*C%i7yU+LC?eqiWcV5l4zsRTw!?h#$^7oRt| ziU<92F)gpgQxd_Q*1}u3#?W#0iy}l?1%k%>2sZr(HZ-IPtj2CO^CXluuhunG+`wD- zo?T;q#C*Wvnr=36TKz2$C){1fBOBd0_s|Y9Ge0sUo=GVRs%XoQ5SLS6`k;wa8;_W` zO^Od4YwGg&VOUEyEJ9X-S3a^EzJ(bO#WQm!X5uGj7YwI-9kjoKR!KsN<0gyR$N}h! z!J#fuu!Hr5i^OT6`kYzKlVeb52L8%fAh7p^u8qs3-f{CT3**I;x)6BhnYW*L>*&p+ zxIQnkdfTUNSqA%K$WB-@x(?H&;PN=AMNw(WI;BW@ROE#E5Xu07~6zrP) zGVUvSfvk2sUPiX|10c~a*|co9sKF1m z6SoE?$wxMF5_(`bQkmSEO4kr%+%C+5eW2vPt&kPNnFMxMWg~p<{JF)MGw~D8O)tjJ zF3g`fyJ!etK*6hY!%*QE<;V$m->lbZ>O{kBRfb~*0a(|auvBsEKd?nEeS@5Xr}$Sc z0Vy)ScQgLJvTsMlx1)GU4|G%l1NQ^F?ge(~{eiKO^k)zA1kDqgYV-L-lAzA%1I7%vMG6=9;pO_&1Elf8IZ2CMg~ zZ1^mz5~SgEBb+1h)f8Co@r$woy#*QlkO%9aZdQh@=XG<@@Tu8cHl4k6BYs)QW*!FA zwOm?$1>PAGfxMc8!O;sD5ItGSg#QBU_XnBK)vTyBTdQC!xYZ^l+sZZtkH!(3#$|v- z3%urlxC7$0f}lAe?u59#;FUNO$C5*{0d;D2pf1e=)UCOJdNdv=uL(c}%?s4~L8#!< zTn)ou6BqZ93$}nyrKp1WNbWeAjo`yey zY%_+wd6QcaHTP(q9|Eg1Ba1g=H6I!R9Yz%JxlH#jo5|Z=FXX-ae4Rfh`MjU!&zkyL zoIMF?AJ1F<5hO#6$6)Ou1d#9J`GvZ-jou`vdazZgzYb4(c>cs10asiVk;fYV06ZJ! z`BODFNL(h_RnV;Fc4+j_Y;v*2^WyX8=I0>mF5nRf<;o=gWK{enf#IM(Z#uFdwCXNr z4X%js`=TQCuj|gIsH84s65!JU9+6!I6`TV5!~`xZeH-KVN5!)w1&+6x%40zoL{iOw z$Tw$kOjK87003kHHo~qA~F_VkHStS;+!Ip~&gwanNOI>f38(HxYPAr>@B0zQd*mm;#K5Mixma zS1@@EVHq`xLOgRx7BgUXG$t07#e!mxfhPHfET4a(ju1fG;G`sBz2F%bL03~UKob^} zP(vG(m?_siRIXDKg{oR|1_wF^#E8@c`~2C(*)y|CGYd88PHj75HJ=$iD+Be8;jSl@Hh2lO&$i1n$J^pHxJ!;(t4vnljf?PC{uabD zXr!JdjkMJf0;~ub}xFD=DNO;EPtqN8$$k+nakd^FYZ2oG* zO7=0fV0DL;9As>5)gddnhp~mLQ7ak4Mpf2xl7SBMPq5euRS_)3wS-2lKbT4qI95`GF949Or`EL0Ui>71#IzXoRC44?w3^Q{(rn>ja8_zEZN^gzop4$ggR?itAaSW z9qs^Y(yT+XNgM()=B2UAwv)`O-z?ZYOtUg?C|GS9EK=Z_K@*K@RdlW7Q@oe!EcuqnRx@ zRNQs7>#}7CfQjKyEr=fQ1q)M7QIKfOAZt1sk(_62xIh5S3V`xp)M)W+oI#Ex4 zr5Qn>&>-NPq72BmOeX2GajMEh(=wbCUmv8U1}DWQ_wA1zDAda2(f|~T=rDq~&&ZP^ zEp?QZp+%_poonGgG?ZXg*f0JBBJVSo!1a1{l={!j!?EJ6{PcI7z<1WO=16O~cWpQK zo?}QClmJg`7z3MOi#=g**w1E%_91^H4bCs>*{Qq;*eOxuI2}gpNIDC^xiSTjdWl~^ zP0BcsB2)67Ht!AT)8T&|{io5Njs1KKa@H5$S$ccvtqV6{Q`bt+9r@Y+z(s=aZhj}N zAE1__2)DfQ#Q4*%?gb3yLd=Xtk3|WBp=2+RsL#Y*cm<9xRN8xkL(|%D0Kh9L2A={P zKv7Y_8=M3x5m6g}ehtsebwr3z!+;NR6+l>7B4b!D4uDJ`c>-H-krV`mUA+$Pnk{%N zEnwJz2goWGU{@8iNNWnjR2<&s86yRF{15O~4pEI!^)lY@`jJ};_xb*Ne1Dl2E4)}d z0oV)r|LuC9ZC!mpCuuYJ)67pY~#e%N!nr`$GN zX&WwiYd6+D0>gur4p2S)Z~QcSJYf4(fIr^n_*E|p)CdBLf*FVS4A`*XlM<9o1U(
>@g_*4`8H$)IO-LxJxM_ODjwpFg&-IgxczfO8f zw&r@&Cg-BHMhP^GW&<=8^wh`jH?IPxahEM858M;305biu^_=E7RaADhOWTH?6E1k+~H{RxEJQ<7ZH;)wG7QH}Dtkx6a%= zqepsmUq}yjSA8z8^K%Bsmq@?{3Nrri%~wjk{*B`RVE95-e9POl(OLG2fJx9Derwmp zu96^@IMI}o$fBQBfxz_gmMf^3s7lzfj8O*Cqb?(44G9cxb4J=4S=QDdx1dh)E|Tvf z`2moJew3IdJOL-Pt?&eDWx7B5fWrQ;N$eHKf>#60co`h~fVBJAvWDJ3NWlfL!_&MaTf+(}c$)1Ujea5k zpb7tYC_r*auDkA5TMIs4uonahsn`oX2>q9sLZI0Sl31L_JdCFGO|X|fEd&rHhQGBwek%BO5uWE3v4#ooQ={dp1svBE%=A( zf%ZGJPAz;l&?-Y)TVt7$08>qXJ6Sa*fPXI~){x1EHqjvs;=1o4**x~T00{}1=WpV1L_~cvzQ2z`K-bFCr zTp*PJs8Qf70}m|X*(+eXoP>ve*ey2#mNgT{*5c$4zJteKAjwgbo&w|-8!?3|icf;3 zJwY3tfU(-@LE&L7WU^mefJn`PjiLp68PetjaZ4UR9C*&kxArpRQ!c?L5Vy3O#N2j* z{W=Qs%6{}nzpyt=o%<0a;jktRfx)WHhAqiJUVaQke(kg!4r@Ue*1o8DoOL?|eOAuF zYoH7mKFl|fFcS(_15E$G?N{`{p~~Psy}Rdjq!t|>sSF>|DFr>?1M3cny`=^!+oL=j zy+emmlHtNoN)Z!Gp|A>-&$yfhPeBrDB^zxfU{VKzOp))gP!z*~$B1$?Xz(w?r#ZFL zYMe}h-%yaFSRc2x22DDU9x&`H@^w(OFU_5!hhL7>E6@$YK@fzZn1{h%L(I;^^-)RE zBWJ^z$iZQ^WPYpA)x>gEzqctDiV1;j1{fOcHTeMR`YR-VjpWBjHjz*S=_e3_Z#$&x zM8Z6}(A303el!H!NcFEatOS0r5lF6N0i=s(uHqTp)s05*adxX69_}r- zk5$^o);+q=eqZRlC-jzu9Ti~*8o|d|y>H}x-^9JXiBBV?zKL?*bfs^4{mgH{HlY3z z9Cd6YH^@e^;u|cU+VXdTncyFQj|4d3duQG_voU<9y?Ca~#VTB^#Kj);iN#rcV5s=K zzH1b|@X&|%Alx)GQk?m1p#9c2Hu%kN{Lo(x?5+fMSD7&fa5Io~`_{ldJsib$91|_s)Cn zotwj-L`v?RW%tt+_tPc!(+`5}>khrMYh5sj&{t&~9{*PCh#uUj2mAG&@v4iP@PEz# z`4UOlyLWx&)-#&|xJ$j_dH|*lX4EsJ_Ybbmug~i~{6xYtwD~*@J{|ex=)K7oX`GgL zAoO7ILwZ-=`mEmFzyAEz;K*jCJb17&c(CLhc+fXma(C-)-#gy7y<6U181vS^;O10$ zU|$8MqO(`uIfnPZINiHo!}*|JKt8v7JTMnPstwY`v~_Q_b>6!6?veM7=$)e8(T_CL zi4>~q=&#zWeII5zd#i1b{2ZmpmsF&5baDHUu4)gt}G1dh;lW88;?=3)$T zDdviLy(zc$9xcI=<5n`kV?K8f#QeQC->$g?90CBy)Gt*m+FS z8132;w#b==H?4=}q7za#)@d%?#8e`ouA$!;n1d){$)G7ow)KO`6xxr*D5&X5Tc`Fb zjs2jt#`mzsLBkb~1K4bSueH2JeUdmq#Y4W0WG9j_B;P~wH$V)*`f5>D2!}bKZPk@@ z>Z0jlI@KFF_;`Sz%Vjvt&wl8N;n!jK=>fFn0?0X=hGRshsi?_YSp73deu`us$*V{Z z3NtbJ{TMrjWEu%-A1Z{Y&qG}lDw8ObL%D@Yu!rs==67gQBK_%%r$~QIkx+@L-Flr7Xv_12lrQtSPI=`OY2 zHUIvW`>m;)Q}>0P_k^8gVW@)lXhnFk=+=W>8_$%3yNW(tXuGuv4~y=q)85IIm~hq0 z^+FgyP+7q70ZQ>G0jQYm0;rhn`rHoX-t0C@hBi4owRGG(+{yCNfVe|;SXxqU8kkd+@WrOU_D$`10Or%Q>_3I;15a;^9U^SFvS&$ch IETtUszaYOY761SM diff --git a/api/__pycache__/main.cpython-311.pyc b/api/__pycache__/main.cpython-311.pyc deleted file mode 100644 index 62d2e78e205978e4ab30b96e1cac23c7a2a59794..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 6296 zcmd5AU2Id=`P?7>JGK+YNg!!*fe>OKF*LO4Xh)Z%K>2B)By_Ed+fkLt_%lYWMK)j&3w3^LrOwq!%eP&S+l zv$kK2WLuN13=aU_mTcn)PmU8g_!*H~Xy`Vq=4*IrB^hP35YWO~Xzh#^0b1)8T8z=! zfEL|C>!5<%PCYZc9Q#bX4Ilct@i~(0X*A_ zIe8m{fURx6?Sh%`)Cxyn=JrOb`7L+f=3pc|wIcV>ZrVe8X84T{!75+cvdWi$V{cdLdgO}osFVHSIF7I92w|S<_PZ50L`xn3PA3J((TpBxj z;;?^6nwU$Q63xkZP05+kjHXMMsh*>%q~%Q|t1PBXMaxOE^NO6#WhhJ-o25BZ$vEi_ z28u~_n0AFq$Ing=$vR*;$dT=ry~v!iKaA(zQ$Ig?&9v*}!V zmg>+go=ux`@b(-_8zw9m(3ZESr_Ua}mZ7jbEe9Q;i8JS>-ce*(rB~BBg`U8a37^j# zQ&eiidD}OhHt0K=OjX-;OrffblVXd zcsdA;TZ9s@P9({nCsd%K%)J4lEs?_M z!s$;>S6X`uN0&w{u@`EIK|u7b#_a#;aNKZI&4&}L9gdNQO;Fp#Ze-f#wab%nkKTr# ziXv!75JS*`;3WXIcu~_-y%XDPO=zg}?gr=s#6U%K>@5%~oGhID^kgL*EsQUTg)@aS zpTbi(Q)7X%9ouSH&Zjm|n(rJmPe3hP6P%ix6v6ViWfAYcKE}1?-ZaKTy)eeJKE^A% zWZ#%Z6ioUV-xk&iB?HtYhZa4A0H1KZFWA%xdIUO|&`#QIrk2-KZFV7b$(rZUIh+wFwHgBekcxbb{;4<`G2sR`KCBhcK-03Ly?ECtD z#D)+g5Nwht3HuQA!(;pvfJ35U+l%LyM^+D7v7u6Is2m$AoLDj}aR_Qj9IAA57wPiE z%9zzLQ0f>ccMKFx76nTjfLam<7KG|?jFdY@Y6C#+MoQwyW*w~Tj$2~K zval@w)^CY`$4g?o(m!a4v7)g&{p&?b1bnb04p#d5m*-cf?wtGPEvs*=)Hhb{8?(gD z<)|gXd#uDDAT`0q{W%05?-9!uV<&S-Fn1toJPHqW1JGX6>S=~A{Konk} z)u_ulMi5N3ZBbS-=H@$i8OaES;n}Ly&B|N}*3B0hvD)TvqAjHJ`OJEF13icf;7^~T zZ8-%^3_O2I-Xpg-2nh)&1vkmo5$E(*V2FVeBpt1c?NyZ-YUI;7eH5A_NJSC*C;STJ zk?1QN-T1LJEWX72?x~u4t)EfNy`ljGPvtTdQ>Bcb&F!6zi*`#|Rkf=rO;=`>oME@t zni-wS=%~`F(HN1X=A0(4H|J7tw6UWcGvdoN}!iTj|-R23_N-? zaK&J`t(viC%lP{ zu3luX?7PnYj{JcK%MH8Ui2bkgH+Y$R!vCDV%3USbgb&D7E*_Y)Lubw&og6!nfgWeHS-jxvkrYFMa6vMtIV>H$me+JsQXuG4bi>>)nR5`PftctENK9w*Dm^THL~FyG zjD9>kJp9qIIIs7>VwsSDf?)75{rDMN)nW@?x-^r92uwaKeZ|{>nmmCSm#OWRsR^XG zAAG!FduZ;8qH8%@n5Cxeg1AP>*IYOrydFk?tZmrpDR^22Ls|MT={}Te>8GFcCWr=g%6g(2g~7umiOTI4+HI0!oieg_%PJHDp-e3{AB_F*3ijP=wvx`(mFe3 zd8fXIQWbC#AO=P^cU}9&4wAbE{o^s=?(4Dfh;T3B0({Yzs0Xanp-?E?Org~O8-=>T z{V;_B<-*W|6~Nck6ma$z0ulgUS53f!wb6utGJ;}$y{?>~j2P3^i|o11t$CdaHAO%f zQIzYYu8iwng`nOi`|HZM$wV+^3~W-y;NoxmAc-%|l~hg3A7l#*JG9K1X;h571-a#;l0Cii(%lrW!WAT7~1_n2*0sHD%=x|yCZ$m-!DWD~h{ty6I zgvful2qIX7P-1rSNDuJZgDI8#dzt zvl$Iow;At#3pV3jtf6zI(7AHxob~>PmiNPk&A7m928JnjU1PrSFu51@kBu)X;O_-g|CbktWJ&4cZ!C-OQPh?+35gqLN8 z685oZ5(76D*V&=8ET=X;TocDbNM>on7+|4QQ zoN4<~jl;Fwijjh&#q@q2GLAOnDjBinIgo$mK=RFf zRf6y3>$P_?L>>p$M^<+>nC!C#UPUgWWp1>}cX1cE;+s{1Z)*?4NQg8;Wb@kk(CTZX zrFH48V%IMxznpv!+<8B^vlQ$vW8GH{zI-QH3cd+qD+C`1(fdNQB*e->4EOaOR}np* zpSW>iDN}sC`1)!Tejj|*yVm=SY(*zZ(TQ?&;@iE26D9F{Sv+rv=c^$i2A3A^3)}EU p55P$mCh=Dh!-eCYohk~;@zs7yOq9e#Sxf*@t5p{aW;0ll@^5*$$5#LV diff --git a/api/agentic.py b/api/agentic.py index 95ad6c1..50272e3 100644 --- a/api/agentic.py +++ b/api/agentic.py @@ -11,10 +11,16 @@ import json import os import re +import shlex +import shutil import subprocess +import selectors +import sys +import time import uuid from dataclasses import dataclass from pathlib import Path +from typing import Iterator from typing import Any from openai import OpenAI @@ -22,6 +28,7 @@ ROOT_DIR = Path(__file__).resolve().parent.parent REFERENCE_DIR = ROOT_DIR / "reference" +TRITON_DIR = ROOT_DIR / "solutions_triton" RUNS_DIR = ROOT_DIR / ".agent_runs" @@ -48,6 +55,14 @@ class IterationResult: model_feedback: str +def _clip(text: str, limit: int) -> str: + if limit <= 0: + return text or "" + if len(text) <= limit: + return text + return text[:limit] + "\n... [truncated]" + + def _openai_client() -> OpenAI: api_key = os.environ.get("OPENAI_API_KEY", "").strip() if not api_key: @@ -66,6 +81,17 @@ def _extract_python_code(text: str) -> str: return src +def _extract_plan_and_code(text: str) -> tuple[str, str]: + src = (text or "").strip() + marker = "###CODE_START" + if marker in src: + head, tail = src.split(marker, 1) + plan = head.strip() + code = _extract_python_code(tail.strip()) + return plan, code + return "No explicit plan returned.", _extract_python_code(src) + + def _read_problem_reference(problem_id: int) -> str: path = REFERENCE_DIR / f"{problem_id}.py" if not path.exists(): @@ -73,6 +99,13 @@ def _read_problem_reference(problem_id: int) -> str: return path.read_text(encoding="utf-8") +def _read_triton_seed(problem_id: int) -> str: + path = TRITON_DIR / f"{problem_id}_triton.py" + if not path.exists(): + return "" + return path.read_text(encoding="utf-8") + + def _read_problem_descriptions() -> str: path = REFERENCE_DIR / "problems.md" if not path.exists(): @@ -101,8 +134,11 @@ def _generate_candidate( objective: str, problem_id: int, iteration_idx: int, + target_backend: str, previous_best_code: str | None, previous_feedback: str | None, + previous_eval_feedback: str | None, + quality_feedback: str | None, topology_json: str | None, ) -> tuple[str, str]: """ @@ -110,11 +146,20 @@ def _generate_candidate( """ client = _openai_client() reference_code = _read_problem_reference(problem_id) + triton_seed = _read_triton_seed(problem_id) problem_docs = _read_problem_descriptions() best_code_section = previous_best_code if previous_best_code else "# none yet" feedback_section = previous_feedback if previous_feedback else "# first iteration" + eval_feedback_section = previous_eval_feedback if previous_eval_feedback else "# no evaluator feedback yet" + quality_feedback_section = quality_feedback if quality_feedback else "# no quality issues from last attempt" topology_section = topology_json if topology_json else "{}" + backend_requirements = ( + "Target backend is triton+nvshmem. Use Triton kernels and NVSHMEM APIs; avoid plain NCCL-only all_reduce wrappers." + if target_backend == "triton" + else "Target backend is reference/pytorch distributed baseline." + ) + user_prompt = f""" Objective: {objective} @@ -144,7 +189,22 @@ def _generate_candidate( 1) Write an improved solution module. 2) Keep function signature compatible with reference. 3) Prioritize correctness first, then performance. -4) Return ONLY code. +4) Return with this exact format: +PLAN: <1-3 short bullets of what you'll try> +###CODE_START + + +Backend constraint: +{backend_requirements} + +Triton seed (if available): +{triton_seed if triton_seed else "# none"} + +Quality feedback from previous attempt: +{quality_feedback_section} + +Evaluator feedback from previous attempt (errors, logs, metrics): +{eval_feedback_section} """ response = client.responses.create( @@ -154,26 +214,8 @@ def _generate_candidate( {"role": "user", "content": user_prompt}, ], ) - code = _extract_python_code(response.output_text) - - feedback_prompt = f""" -You proposed this candidate. Give a short self-critique with: -- likely strengths -- likely risks -- what to change next iteration - -Candidate code: -{code} -""" - feedback_resp = client.responses.create( - model=model, - input=[ - {"role": "system", "content": "You are a strict code reviewer for FERB kernels."}, - {"role": "user", "content": feedback_prompt}, - ], - ) - feedback = (feedback_resp.output_text or "").strip() - return code, feedback + plan_text, code = _extract_plan_and_code(response.output_text) + return code, plan_text def _parse_score(stdout: str) -> float | None: @@ -208,11 +250,143 @@ def _parse_score(stdout: str) -> float | None: return None -def _run_evaluator(command_template: str, candidate_path: Path, timeout_s: int) -> tuple[float | None, str, str]: +def _normalize_evaluator_command(command: str) -> str: + """ + If torchrun is unavailable, replace it with: + python -m torch.distributed.run ... + """ + if shutil.which("torchrun"): + return command + try: + tokens = shlex.split(command) + except Exception: + return command + if not tokens: + return command + if tokens[0] != "torchrun": + return command + py = _find_python_with_torch() or (sys.executable or "python3") + rewritten = [py, "-m", "torch.distributed.run"] + tokens[1:] + return " ".join(shlex.quote(t) for t in rewritten) + + +def _python_has_torch(python_exe: str) -> bool: + try: + proc = subprocess.run( + [python_exe, "-c", "import torch; print(torch.__version__)"], + capture_output=True, + text=True, + timeout=10, + ) + return proc.returncode == 0 + except Exception: + return False + + +def _find_python_with_torch() -> str | None: + """ + Find a Python interpreter with torch installed. + Priority: + 1) FERB_EVAL_PYTHON env var + 2) python3 in PATH + 3) python in PATH + 4) current interpreter + """ + candidates: list[str] = [] + env_py = os.environ.get("FERB_EVAL_PYTHON", "").strip() + if env_py: + candidates.append(env_py) + for name in ("python3", "python"): + p = shutil.which(name) + if p: + candidates.append(p) + if sys.executable: + candidates.append(sys.executable) + + seen: set[str] = set() + for c in candidates: + if c in seen: + continue + seen.add(c) + if _python_has_torch(c): + return c + return None + + +def _candidate_quality_issues(code: str, target_backend: str) -> list[str]: + src = (code or "").lower() + issues: list[str] = [] + if not src.strip(): + issues.append("empty candidate code") + return issues + if "def solution" not in src: + issues.append("missing solution(...) function") + if target_backend == "triton": + if "import triton" not in src: + issues.append("missing Triton import") + if "nvshmem" not in src: + issues.append("missing NVSHMEM usage") + if "dist.all_reduce" in src and "triton" not in src: + issues.append("NCCL-only fallback detected; expected triton/nvshmem solution") + return issues + + +def _is_infra_blocker(stderr: str, target_backend: str) -> str | None: + s = (stderr or "").lower() + if "token missing" in s and "modal" in s: + return "Modal authentication token missing (modal CLI not logged in)." + if "could not authenticate client" in s and "modal" in s: + return "Modal authentication failed (check your modal token credentials)." + if target_backend == "triton": + if "no module named 'triton'" in s or "no module named triton" in s: + return "Evaluator environment is missing Triton." + if "no module named 'nvshmem'" in s or "no module named nvshmem" in s: + return "Evaluator environment is missing NVSHMEM bindings." + if "torch.cuda" in s and ("not available" in s or "not compiled" in s): + return "Evaluator environment has no CUDA." + if "torch.cuda.set_device" in s: + return "Evaluator environment cannot bind CUDA devices (likely no GPU)." + return None + + +def _rewrite_python_launcher(command: str, python_exe: str | None) -> str: + """ + If command begins with python/python3, replace with explicit interpreter. + """ + if not python_exe: + return command + try: + tokens = shlex.split(command) + except Exception: + return command + if not tokens: + return command + head = tokens[0] + if head in {"python", "python3"}: + tokens[0] = python_exe + return " ".join(shlex.quote(t) for t in tokens) + return command + + +def _run_evaluator( + command_template: str, + candidate_path: Path, + timeout_s: int, + evaluator_python: str | None = None, +) -> tuple[float, str, str]: """ Run evaluator command and parse score from stdout. """ command = command_template.format(candidate_path=str(candidate_path)) + py_with_torch = None + if evaluator_python: + # Only honor explicit evaluator_python if it exists and imports torch. + if os.path.exists(evaluator_python) and _python_has_torch(evaluator_python): + py_with_torch = evaluator_python + if py_with_torch is None: + py_with_torch = _find_python_with_torch() + command = _rewrite_python_launcher(command, py_with_torch) + command = _normalize_evaluator_command(command) proc = subprocess.run( command, shell=True, @@ -224,18 +398,161 @@ def _run_evaluator(command_template: str, candidate_path: Path, timeout_s: int) stdout = proc.stdout or "" stderr = proc.stderr or "" score = _parse_score(stdout) + if score is None: + score = 0.0 + if proc.returncode != 0: + if stderr: + stderr = stderr + f"\n[evaluator_exit_code={proc.returncode}]" + else: + stderr = f"[evaluator_exit_code={proc.returncode}]" + if py_with_torch is None: + extra = ( + "\n[no_python_with_torch_found] Set FERB_EVAL_PYTHON to a Python interpreter " + "that has torch installed." + ) + stderr = (stderr + extra) if stderr else extra.lstrip() return score, stdout, stderr +def _run_evaluator_live( + command_template: str, + candidate_path: Path, + timeout_s: int, + evaluator_python: str | None = None, + *, + heartbeat_every_s: float = 2.0, + tail_chars: int = 1200, +) -> Iterator[dict[str, Any]]: + """ + Run evaluator command while yielding periodic heartbeats. + + This is mainly to keep the SSE UI "alive" while long-running evaluators + (e.g. `modal run ...`) build images / wait for GPUs / execute benchmarks. + """ + command = command_template.format(candidate_path=str(candidate_path)) + py_with_torch = None + if evaluator_python: + if os.path.exists(evaluator_python) and _python_has_torch(evaluator_python): + py_with_torch = evaluator_python + if py_with_torch is None: + py_with_torch = _find_python_with_torch() + command = _rewrite_python_launcher(command, py_with_torch) + command = _normalize_evaluator_command(command) + + start = time.time() + last_hb = 0.0 + stdout_chunks: list[str] = [] + stderr_chunks: list[str] = [] + + proc = subprocess.Popen( + command, + shell=True, + cwd=str(ROOT_DIR), + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, + text=True, + bufsize=1, + ) + + assert proc.stdout is not None + assert proc.stderr is not None + + sel = selectors.DefaultSelector() + sel.register(proc.stdout, selectors.EVENT_READ, data="stdout") + sel.register(proc.stderr, selectors.EVENT_READ, data="stderr") + + def _tail(txt: str) -> str: + if tail_chars <= 0: + return "" + if len(txt) <= tail_chars: + return txt + return txt[-tail_chars:] + + while True: + now = time.time() + elapsed = now - start + if elapsed > timeout_s: + try: + proc.kill() + except Exception: + pass + stderr_chunks.append(f"\n[evaluator_timeout_s={timeout_s}]") + break + + # Drain any available output. + events = sel.select(timeout=0.5) + for key, _mask in events: + stream_name = key.data + try: + line = key.fileobj.readline() + except Exception: + line = "" + if not line: + continue + if stream_name == "stdout": + stdout_chunks.append(line) + else: + stderr_chunks.append(line) + + if (now - last_hb) >= heartbeat_every_s: + last_hb = now + out = "".join(stdout_chunks) + err = "".join(stderr_chunks) + yield { + "elapsed_s": int(elapsed), + "stdout_tail": _tail(out), + "stderr_tail": _tail(err), + } + + rc = proc.poll() + if rc is not None: + # Drain remaining buffered output + for stream, name in ((proc.stdout, "stdout"), (proc.stderr, "stderr")): + try: + rest = stream.read() + except Exception: + rest = "" + if not rest: + continue + if name == "stdout": + stdout_chunks.append(rest) + else: + stderr_chunks.append(rest) + break + + stdout = "".join(stdout_chunks) + stderr = "".join(stderr_chunks) + score = _parse_score(stdout) + if score is None: + score = 0.0 + if proc.returncode not in (0, None): + if stderr: + stderr = stderr + f"\n[evaluator_exit_code={proc.returncode}]" + else: + stderr = f"[evaluator_exit_code={proc.returncode}]" + if py_with_torch is None: + extra = ( + "\n[no_python_with_torch_found] Set FERB_EVAL_PYTHON to a Python interpreter " + "that has torch installed." + ) + stderr = (stderr + extra) if stderr else extra.lstrip() + return (score, stdout, stderr) + + def run_agentic_optimization( *, objective: str, problem_id: int, iterations: int = 3, model: str = "gpt-4o-mini", + target_backend: str = "triton", topology_json_path: str | None = None, evaluator_command: str | None = None, evaluator_timeout_s: int = 240, + evaluator_python: str | None = None, + include_full_code: bool = False, + include_trace_output: bool = True, + trace_text_limit: int = 0, ) -> dict[str, Any]: """ Iterative generate/evaluate/select loop. @@ -257,22 +574,35 @@ def run_agentic_optimization( best_score: float | None = None best_candidate_path: str | None = None previous_feedback: str | None = None + previous_eval_feedback: str | None = None trace: list[IterationResult] = [] for idx in range(1, iterations + 1): - code, model_feedback = _generate_candidate( - model=model, - objective=objective, - problem_id=problem_id, - iteration_idx=idx, - previous_best_code=best_code, - previous_feedback=previous_feedback, - topology_json=topology_json, - ) + quality_feedback = None + model_feedback = "" + code = "" + for _attempt in range(1, 4): + code, model_feedback = _generate_candidate( + model=model, + objective=objective, + problem_id=problem_id, + iteration_idx=idx, + target_backend=target_backend, + previous_best_code=best_code, + previous_feedback=previous_feedback, + previous_eval_feedback=previous_eval_feedback, + quality_feedback=quality_feedback, + topology_json=topology_json, + ) + issues = _candidate_quality_issues(code, target_backend) + if not issues: + break + quality_feedback = "Quality issues: " + "; ".join(issues) + candidate_path = run_dir / f"candidate_iter_{idx}.py" candidate_path.write_text(code, encoding="utf-8") - score = None + score = 0.0 eval_stdout = "" eval_stderr = "" if evaluator_command: @@ -280,12 +610,41 @@ def run_agentic_optimization( evaluator_command, candidate_path, timeout_s=evaluator_timeout_s, + evaluator_python=evaluator_python, + ) + previous_eval_feedback = ( + "Evaluator stdout:\n" + + (eval_stdout or "") + + "\n\nEvaluator stderr:\n" + + (eval_stderr or "") + + f"\n\nScore: {score}" ) + blocker = _is_infra_blocker(eval_stderr, target_backend) + if blocker: + # Don't keep iterating when the evaluator environment can't run the target backend. + if best_code is None: + best_code = code + best_score = score + best_candidate_path = str(candidate_path) + previous_feedback = model_feedback + trace.append( + IterationResult( + iteration=idx, + candidate_path=str(candidate_path), + score=score, + evaluator_stdout=eval_stdout, + evaluator_stderr=(eval_stderr or "") + f"\n[blocked] {blocker}", + model_feedback=model_feedback, + ) + ) + break + else: + previous_eval_feedback = None choose_new_best = False if best_code is None: choose_new_best = True - elif score is not None and (best_score is None or score > best_score): + elif best_score is None or score > best_score: choose_new_best = True if choose_new_best: @@ -305,7 +664,234 @@ def run_agentic_optimization( ) ) - return { + result = { + "run_id": run_id, + "run_dir": str(run_dir), + "problem_id": problem_id, + "objective": objective, + "iterations": iterations, + "model": model, + "best_score": best_score, + "best_candidate_path": best_candidate_path, + "trace": [ + { + "iteration": t.iteration, + "candidate_path": t.candidate_path, + "score": t.score, + "model_feedback": _clip(t.model_feedback, trace_text_limit), + "evaluator_stdout": ( + _clip(t.evaluator_stdout, trace_text_limit) if include_trace_output else "" + ), + "evaluator_stderr": ( + _clip(t.evaluator_stderr, trace_text_limit) if include_trace_output else "" + ), + } + for t in trace + ], + } + if include_full_code: + result["best_code"] = best_code + return result + + +def stream_agentic_optimization_events( + *, + objective: str, + problem_id: int, + iterations: int = 3, + model: str = "gpt-4o-mini", + target_backend: str = "triton", + topology_json_path: str | None = None, + evaluator_command: str | None = None, + evaluator_timeout_s: int = 240, + evaluator_python: str | None = None, + feedback_preview_chars: int = 1200, +) -> Iterator[dict[str, Any]]: + """ + Stream per-iteration events for real-time visibility. + """ + if iterations < 1: + raise ValueError("iterations must be >= 1") + + topology_json = None + if topology_json_path: + topo_path = Path(topology_json_path) + if topo_path.exists(): + topology_json = topo_path.read_text(encoding="utf-8") + + run_id = uuid.uuid4().hex[:12] + run_dir = RUNS_DIR / run_id + run_dir.mkdir(parents=True, exist_ok=True) + + best_code: str | None = None + best_score: float | None = None + best_candidate_path: str | None = None + previous_feedback: str | None = None + previous_eval_feedback: str | None = None + trace: list[IterationResult] = [] + + yield { + "type": "run_started", + "run_id": run_id, + "run_dir": str(run_dir), + "objective": objective, + "problem_id": problem_id, + "iterations": iterations, + "model": model, + "target_backend": target_backend, + } + + for idx in range(1, iterations + 1): + yield {"type": "iteration_started", "iteration": idx} + + quality_feedback = None + code = "" + model_feedback = "" + for attempt in range(1, 4): + code, model_feedback = _generate_candidate( + model=model, + objective=objective, + problem_id=problem_id, + iteration_idx=idx, + target_backend=target_backend, + previous_best_code=best_code, + previous_feedback=previous_feedback, + previous_eval_feedback=previous_eval_feedback, + quality_feedback=quality_feedback, + topology_json=topology_json, + ) + yield { + "type": "agent_thought", + "iteration": idx, + "attempt": attempt, + "text": model_feedback, + } + issues = _candidate_quality_issues(code, target_backend) + if not issues: + break + quality_feedback = "Quality issues: " + "; ".join(issues) + yield { + "type": "quality_reject", + "iteration": idx, + "attempt": attempt, + "issues": issues, + } + + candidate_path = run_dir / f"candidate_iter_{idx}.py" + candidate_path.write_text(code, encoding="utf-8") + yield { + "type": "candidate_generated", + "iteration": idx, + "candidate_path": str(candidate_path), + "feedback_preview": _clip(model_feedback, feedback_preview_chars), + "candidate_code_preview": _clip(code, 20000), + } + + score = 0.0 + eval_stdout = "" + eval_stderr = "" + if evaluator_command: + yield { + "type": "evaluation_started", + "iteration": idx, + "command": evaluator_command, + } + timeout_s = evaluator_timeout_s + if "modal run" in (evaluator_command or "") and timeout_s < 1800: + # Modal runs often include image build/pull and GPU scheduling latency. + timeout_s = 1800 + + live = _run_evaluator_live( + evaluator_command, + candidate_path, + timeout_s=timeout_s, + evaluator_python=evaluator_python, + heartbeat_every_s=2.0, + tail_chars=1500, + ) + while True: + try: + hb = next(live) + yield { + "type": "evaluation_heartbeat", + "iteration": idx, + **hb, + } + except StopIteration as stop: + score, eval_stdout, eval_stderr = stop.value + break + previous_eval_feedback = ( + "Evaluator stdout:\n" + + (eval_stdout or "") + + "\n\nEvaluator stderr:\n" + + (eval_stderr or "") + + f"\n\nScore: {score}" + ) + yield { + "type": "evaluation_completed", + "iteration": idx, + "score": score, + "stdout_preview": _clip(eval_stdout, 0), + "stderr_preview": _clip(eval_stderr, 0), + } + + blocker = _is_infra_blocker(eval_stderr, target_backend) + if blocker: + if best_code is None: + best_code = code + best_score = score + best_candidate_path = str(candidate_path) + yield { + "type": "blocked", + "iteration": idx, + "reason": blocker, + "hint": ( + "If this is a Modal auth issue, run `modal token new` locally (same environment running the API), " + "then rerun. For Triton/NVSHMEM acceleration you must evaluate on a CUDA multi-GPU environment " + "(e.g. Modal H100x8) with triton + nvshmem installed." + ), + } + break + else: + previous_eval_feedback = None + + choose_new_best = False + if best_code is None: + choose_new_best = True + elif best_score is None or score > best_score: + choose_new_best = True + + if choose_new_best: + best_code = code + best_score = score + best_candidate_path = str(candidate_path) + yield { + "type": "best_updated", + "iteration": idx, + "best_score": best_score, + "best_candidate_path": best_candidate_path, + } + else: + yield { + "type": "best_unchanged", + "iteration": idx, + "best_score": best_score, + "best_candidate_path": best_candidate_path, + } + + previous_feedback = model_feedback + trace.append( + IterationResult( + iteration=idx, + candidate_path=str(candidate_path), + score=score, + evaluator_stdout=eval_stdout, + evaluator_stderr=eval_stderr, + model_feedback=model_feedback, + ) + ) + + result = { "run_id": run_id, "run_dir": str(run_dir), "problem_id": problem_id, @@ -314,16 +900,14 @@ def run_agentic_optimization( "model": model, "best_score": best_score, "best_candidate_path": best_candidate_path, - "best_code": best_code, "trace": [ { "iteration": t.iteration, "candidate_path": t.candidate_path, "score": t.score, - "model_feedback": t.model_feedback, - "evaluator_stdout": t.evaluator_stdout, - "evaluator_stderr": t.evaluator_stderr, + "model_feedback": _clip(t.model_feedback, 0), } for t in trace ], } + yield {"type": "run_completed", "result": result} diff --git a/api/main.py b/api/main.py index 007cbfa..6140b71 100644 --- a/api/main.py +++ b/api/main.py @@ -5,6 +5,7 @@ """ import os +import json from contextlib import asynccontextmanager from pathlib import Path @@ -12,11 +13,13 @@ from fastapi import FastAPI from fastapi import HTTPException from fastapi.middleware.cors import CORSMiddleware +from fastapi.responses import StreamingResponse from fastapi.staticfiles import StaticFiles from pydantic import BaseModel, Field from agentic import gpt_chat_reply from agentic import run_agentic_optimization +from agentic import stream_agentic_optimization_events # --------------------------------------------------------------------------- @@ -38,9 +41,13 @@ class OptimizeRequest(BaseModel): problem_id: int = Field(..., ge=1) iterations: int = Field(default=3, ge=1, le=10) model: str = "gpt-4o-mini" + target_backend: str = "triton" topology_json_path: str | None = None evaluator_command: str | None = None evaluator_timeout_s: int = Field(default=240, ge=10, le=3600) + evaluator_python: str | None = None + include_full_code: bool = False + include_trace_output: bool = False class OptimizeResponse(BaseModel): @@ -106,15 +113,51 @@ def optimize(body: OptimizeRequest) -> OptimizeResponse: problem_id=body.problem_id, iterations=body.iterations, model=body.model, + target_backend=body.target_backend, topology_json_path=body.topology_json_path, evaluator_command=body.evaluator_command, evaluator_timeout_s=body.evaluator_timeout_s, + evaluator_python=body.evaluator_python, + include_full_code=body.include_full_code, + include_trace_output=body.include_trace_output, ) return OptimizeResponse(ok=True, result=result) except Exception as exc: raise HTTPException(status_code=500, detail=f"optimization failed: {exc}") from exc +@app.post("/optimize/stream") +def optimize_stream(body: OptimizeRequest) -> StreamingResponse: + """ + Stream live agent iteration events via Server-Sent Events (SSE). + """ + if not os.environ.get("OPENAI_API_KEY", "").strip(): + raise HTTPException( + status_code=400, + detail="OPENAI_API_KEY is required for /optimize/stream", + ) + + def _event_stream(): + try: + for event in stream_agentic_optimization_events( + objective=body.objective, + problem_id=body.problem_id, + iterations=body.iterations, + model=body.model, + target_backend=body.target_backend, + topology_json_path=body.topology_json_path, + evaluator_command=body.evaluator_command, + evaluator_timeout_s=body.evaluator_timeout_s, + evaluator_python=body.evaluator_python, + ): + yield f"data: {json.dumps(event)}\n\n" + except Exception as exc: + err_event = {"type": "error", "detail": str(exc)} + yield f"data: {json.dumps(err_event)}\n\n" + + return StreamingResponse(_event_stream(), media_type="text/event-stream") + + # Serve frontend (whitespace chatbot) when running from repo root _frontend = Path(__file__).resolve().parent.parent / "frontend" if _frontend.is_dir(): diff --git a/api/requirements.txt b/api/requirements.txt index 3c785d6..a1bc3af 100644 --- a/api/requirements.txt +++ b/api/requirements.txt @@ -2,3 +2,4 @@ fastapi>=0.115.0 uvicorn[standard]>=0.32.0 pydantic>=2.0.0 openai>=1.55.0 +modal diff --git a/frontend/app.js b/frontend/app.js index 362892a..a32f411 100644 --- a/frontend/app.js +++ b/frontend/app.js @@ -5,6 +5,10 @@ const input = document.getElementById("input"); const submitBtn = document.getElementById("submit"); const messagesEl = document.getElementById("messages"); + const codePanel = document.getElementById("codePanel"); + const ideMeta = document.getElementById("ideMeta"); + let runningOptimize = false; + let typingRunId = 0; function escapeHtml(text) { const div = document.createElement("div"); @@ -16,6 +20,16 @@ return escapeHtml(text).replace(/\n/g, "
"); } + function renderRichText(text) { + // Tiny safe markdown subset: bold, italic, inline code + newlines. + let html = escapeHtml(text || ""); + html = html.replace(/`([^`]+)`/g, "$1"); + html = html.replace(/\*\*([^*]+)\*\*/g, "$1"); + html = html.replace(/\*([^*]+)\*/g, "$1"); + html = html.replace(/\n/g, "
"); + return html; + } + function addMessage(role, content, options = {}) { const div = document.createElement("div"); div.className = "message message--" + role + (options.loading ? " message--loading" : ""); @@ -24,9 +38,9 @@ const bubble = document.createElement("div"); bubble.className = "message__bubble"; if (options.markdown) { - bubble.innerHTML = nl2br(content); + bubble.innerHTML = renderRichText(content); } else { - bubble.innerHTML = "

" + nl2br(content) + "

"; + bubble.innerHTML = "

" + renderRichText(content) + "

"; } div.appendChild(bubble); messagesEl.appendChild(div); @@ -34,11 +48,50 @@ return div; } + function updateIDE(metaText, codeText) { + if (ideMeta && typeof metaText === "string") ideMeta.textContent = metaText; + if (codePanel && typeof codeText === "string") codePanel.textContent = codeText; + } + + function isNearBottom(el, thresholdPx = 48) { + if (!el) return true; + return el.scrollTop + el.clientHeight >= el.scrollHeight - thresholdPx; + } + + function typeIntoCodePanel(fullText, opts = {}) { + if (!codePanel) return; + const text = String(fullText || ""); + const chunkSize = Math.max(1, Number(opts.chunkSize || 12)); + const tickMs = Math.max(8, Number(opts.tickMs || 16)); + + const myRun = ++typingRunId; + codePanel.classList.add("typing"); + codePanel.textContent = ""; + + let i = 0; + const timer = setInterval(() => { + if (myRun !== typingRunId) { + clearInterval(timer); + return; + } + if (i >= text.length) { + clearInterval(timer); + codePanel.classList.remove("typing"); + return; + } + + const keepScroll = isNearBottom(codePanel); + codePanel.textContent += text.slice(i, i + chunkSize); + i += chunkSize; + if (keepScroll) codePanel.scrollTop = codePanel.scrollHeight; + }, tickMs); + } + function setMessageContent(el, content) { const bubble = el.querySelector(".message__bubble"); if (!bubble) return; el.classList.remove("message--loading"); - bubble.innerHTML = "

" + nl2br(content) + "

"; + bubble.innerHTML = "

" + renderRichText(content) + "

"; messagesEl.scrollTop = messagesEl.scrollHeight; } @@ -49,6 +102,13 @@ input.value = ""; input.style.height = "auto"; + const trimmed = text.trim(); + if (trimmed.toLowerCase().startsWith("/optimize ")) { + const parsed = parseOptimizeCommand(trimmed); + await runOptimizeStream(parsed.objective, parsed.options); + return; + } + const loadingEl = addMessage("assistant", "", { loading: true }); submitBtn.disabled = true; @@ -75,6 +135,238 @@ } } + function tokenizeCommand(cmd) { + const re = /"[^"]*"|\S+/g; + const out = []; + const m = cmd.match(re) || []; + for (const t of m) { + if (t.startsWith("\"") && t.endsWith("\"") && t.length >= 2) out.push(t.slice(1, -1)); + else out.push(t); + } + return out; + } + + function parseOptimizeCommand(cmd) { + // Supported: + // /optimize [--backend triton|reference] [--problem N] [--iters N] [--nproc N] [--model NAME] [--no-eval] + const tokens = tokenizeCommand(cmd); + const options = { + problem_id: 1, + iterations: 3, + model: "gpt-4o-mini", + target_backend: "triton", + nproc_per_node: 1, + no_eval: false, + }; + + let i = 1; // skip /optimize + while (i < tokens.length) { + const tok = tokens[i]; + if (!tok.startsWith("--")) break; + const key = tok.slice(2).toLowerCase(); + if (key === "no-eval") { + options.no_eval = true; + i += 1; + continue; + } + const val = tokens[i + 1]; + if (val == null) break; + if (key === "backend") options.target_backend = String(val); + else if (key === "problem") options.problem_id = Number(val); + else if (key === "iters") options.iterations = Number(val); + else if (key === "nproc") options.nproc_per_node = Number(val); + else if (key === "model") options.model = String(val); + i += 2; + } + + const objective = tokens.slice(i).join(" ").trim(); + return { objective, options }; + } + + async function runOptimizeStream(objective, options = {}) { + if (!objective) { + addMessage( + "assistant", + "Usage: /optimize [--backend triton|reference] [--problem N] [--iters N] [--nproc N] [--model NAME] [--no-eval] " + ); + return; + } + if (runningOptimize) { + addMessage("assistant", "An optimize run is already in progress."); + return; + } + + runningOptimize = true; + submitBtn.disabled = true; + const statusEl = addMessage("assistant", "Starting agentic optimization run...", { loading: true }); + + try { + const target_backend = options.target_backend || "triton"; + const problem_id = Number.isFinite(options.problem_id) ? options.problem_id : 1; + const iterations = Number.isFinite(options.iterations) ? options.iterations : 3; + const model = options.model || "gpt-4o-mini"; + const nproc = Number.isFinite(options.nproc_per_node) ? options.nproc_per_node : 1; + const noEval = !!options.no_eval; + + const useModalEval = target_backend === "triton"; + + const res = await fetch(API_BASE + "/optimize/stream", { + method: "POST", + headers: { "Content-Type": "application/json" }, + body: JSON.stringify({ + objective, + problem_id, + iterations, + model, + target_backend, + evaluator_python: window.FERB_EVAL_PYTHON || null, + evaluator_command: noEval + ? null + : useModalEval + ? `modal run scripts/modal_benchmark.py --problem ${problem_id} --candidate {candidate_path} --rows 1024 --cols 1024 --dtype float32 --warmup 3 --iters 10` + : `python -m torch.distributed.run --nproc-per-node ${nproc} scripts/benchmark_candidate.py --problem ${problem_id} --candidate {candidate_path} --rows 1024 --cols 1024 --dtype float32 --warmup 3 --iters 10`, + }), + }); + + if (!res.ok || !res.body) { + const txt = await res.text(); + setMessageContent(statusEl, "Optimize stream failed: " + txt); + return; + } + + setMessageContent( + statusEl, + `Agent run started. Streaming steps below.\n\nbackend=${target_backend} problem=${problem_id} iters=${iterations} eval=${noEval ? "disabled" : (useModalEval ? "modal H100x8" : `local nproc=${nproc}`)}` + ); + + const reader = res.body.getReader(); + const decoder = new TextDecoder(); + let buffer = ""; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + buffer += decoder.decode(value, { stream: true }); + + const lines = buffer.split("\n"); + buffer = lines.pop() || ""; + + for (const line of lines) { + if (!line.startsWith("data: ")) continue; + const raw = line.slice(6); + if (!raw) continue; + let event; + try { + event = JSON.parse(raw); + } catch { + continue; + } + renderOptimizeEvent(event); + } + } + } catch (err) { + addMessage( + "assistant", + "Could not stream optimization. Ensure API is running and OPENAI_API_KEY is set." + ); + } finally { + runningOptimize = false; + submitBtn.disabled = false; + input.focus(); + } + } + + function renderOptimizeEvent(event) { + const t = event && event.type; + if (!t) return; + if (t === "run_started") { + addMessage("assistant", `Run ${event.run_id} started.\nModel: ${event.model}\nIterations: ${event.iterations}`); + return; + } + if (t === "iteration_started") { + addMessage("assistant", `Let's try iteration ${event.iteration}...`); + return; + } + if (t === "candidate_generated") { + updateIDE( + `iter ${event.iteration} · ${event.candidate_path || ""}`, + "" + ); + typeIntoCodePanel(event.candidate_code_preview || "", { chunkSize: 14, tickMs: 14 }); + addMessage( + "assistant", + `Iteration ${event.iteration}: candidate generated.\nPath: ${event.candidate_path}\n\n(Full code is in the Agent IDE pane.)\n\nAgent critique:\n${event.feedback_preview || ""}` + ); + return; + } + if (t === "agent_thought") { + addMessage("assistant", `Iteration ${event.iteration} attempt ${event.attempt} plan:\n${event.text || ""}`); + return; + } + if (t === "quality_reject") { + addMessage( + "assistant", + `Iteration ${event.iteration} attempt ${event.attempt} rejected by quality gate:\n- ${(event.issues || []).join("\n- ")}` + ); + return; + } + if (t === "evaluation_started") { + addMessage("assistant", `Iteration ${event.iteration}: running evaluator...`); + updateIDE(`iter ${event.iteration} · evaluating…`, codePanel ? codePanel.textContent : ""); + return; + } + if (t === "evaluation_heartbeat") { + const elapsed = Number.isFinite(event.elapsed_s) ? event.elapsed_s : 0; + if (ideMeta) ideMeta.textContent = `iter ${event.iteration} · evaluating… ${elapsed}s`; + return; + } + if (t === "evaluation_completed") { + let metricsLine = ""; + try { + const parsed = JSON.parse(event.stdout_preview || "{}"); + if (parsed && typeof parsed === "object" && "speedup" in parsed) { + metricsLine = + `\nallclose=${parsed.allclose} max_abs_diff=${parsed.max_abs_diff}` + + `\nreference_ms=${parsed.reference_ms} candidate_ms=${parsed.candidate_ms}` + + `\nspeedup=${parsed.speedup} score=${parsed.score}`; + } + } catch (_) { + // stdout may not be JSON; ignore + } + addMessage( + "assistant", + `Iteration ${event.iteration}: score=${event.score}${metricsLine}\nstdout:\n${event.stdout_preview || ""}\nstderr:\n${event.stderr_preview || ""}` + ); + return; + } + if (t === "blocked") { + addMessage( + "assistant", + `Blocked at iteration ${event.iteration}.\nReason: ${event.reason || ""}\n\nHint: ${event.hint || ""}` + ); + return; + } + if (t === "best_updated") { + addMessage("assistant", `New best at iteration ${event.iteration}. score=${event.best_score}`); + return; + } + if (t === "best_unchanged") { + addMessage("assistant", `No improvement at iteration ${event.iteration}. Current best score=${event.best_score}`); + return; + } + if (t === "run_completed") { + const r = event.result || {}; + addMessage( + "assistant", + `Run complete.\nBest score: ${r.best_score}\nBest candidate: ${r.best_candidate_path}\nRun dir: ${r.run_dir}` + ); + return; + } + if (t === "error") { + addMessage("assistant", `Run error: ${event.detail || "Unknown error"}`); + } + } + form.addEventListener("submit", function (e) { e.preventDefault(); sendMessage(input.value); diff --git a/frontend/index.html b/frontend/index.html index da474ba..72f60f3 100644 --- a/frontend/index.html +++ b/frontend/index.html @@ -17,27 +17,40 @@
-
-
-
-

Ask about problems, NVSHMEM, Modal runs, MoE, or how to train models faster.

+
+
+
+
+
+

Ask about problems, NVSHMEM, Modal runs, MoE, or how to train models faster.

+

Run live agent loop: /optimize Improve throughput for problem 1 while preserving correctness

+
+
-
-
-
- - -
+
+ + +
+ + + +