From eeed7c73fb19c66beb891e0c9c7f9239e0112a76 Mon Sep 17 00:00:00 2001 From: MohammedTaherMcW Date: Fri, 8 Aug 2025 10:25:54 +0000 Subject: [PATCH] Add Performance and Accuracy metrics for Gemma-3-4b-it --- .../single-card-demo-tests-impl.yaml | 2 ++ models/tt_transformers/PERF.md | 4 +++ .../tt_transformers/demo/simple_text_demo.py | 16 +++++++++++- .../demo/simple_vision_demo.py | 2 ++ .../reference_outputs/gemma-3-4b-it.refpt | Bin 0 -> 50684 bytes models/tt_transformers/tests/test_accuracy.py | 23 +++++++++++++++--- .../single_card/run_single_card_demo_tests.sh | 8 ++++++ 7 files changed, 50 insertions(+), 5 deletions(-) create mode 100644 models/tt_transformers/tests/reference_outputs/gemma-3-4b-it.refpt diff --git a/.github/workflows/single-card-demo-tests-impl.yaml b/.github/workflows/single-card-demo-tests-impl.yaml index 4dfc2d1005ba..7e3b10e6c374 100644 --- a/.github/workflows/single-card-demo-tests-impl.yaml +++ b/.github/workflows/single-card-demo-tests-impl.yaml @@ -92,6 +92,8 @@ jobs: # # Moved to t3k tests until OOM on single card runners resolved # { name: "qwen7b", runner-label: "N300", performance: false, cmd: run_qwen7b_func, owner_id: U03PUAKE719}, # Mark O'Connor { name: "qwen25_vl", runner-label: "N300", performance: true, cmd: run_qwen25_vl_func, owner_id: U07RY6B5FLJ}, #Gongyu Wang + # { name: "gemma3_4b", runner-label: "N300", performance: true, cmd: run_gemma3_4b_func, owner_id: }, # TODO Owner ID needs to be updated + ] name: ${{ matrix.test-group.name }}-${{ matrix.test-group.runner-label }}-${{ (matrix.test-group.performance && 'perf') || 'func' }} env: diff --git a/models/tt_transformers/PERF.md b/models/tt_transformers/PERF.md index 0a44dba88a60..279c62be81c4 100644 --- a/models/tt_transformers/PERF.md +++ b/models/tt_transformers/PERF.md @@ -45,6 +45,8 @@ This configuration uses bfp4 MLP and bfp8 attention weights for all models excep | Mistral-7B | N150 | 95 | 99 | 29.75 | 100.24 | | Mistral-7B | N300 | 95 | 99 | 47.01 | 65.95 | | Mistral-7B | T3K | 95 | 99 | 67.82 | 53.93 | +| gemma-3-4b | N150 | 67.0 | 80 | 28.00 | 81.00 | +| gemma-3-4b | N300 | 52.0 | 72.0 | 23.00 | 152 | ## Accuracy @@ -82,6 +84,8 @@ Llama 3 models test as insensitive to attention precision and so we use bfp8 att | Mistral-7B | N150 | 95 | 99 | 29.75 | 100.24 | | Mistral-7B | N300 | 95 | 99 | 47.01 | 65.95 | | Mistral-7B | T3K | 95 | 99 | 67.82 | 53.93 | +| gemma-3-4b | N150 | 67.0 | 80 | 28.00 | 81.00 | +| gemma-3-4b | N300 | 52.0 | 72.0 | 23.00 | 152 | ## Long-context (64K Tokens) diff --git a/models/tt_transformers/demo/simple_text_demo.py b/models/tt_transformers/demo/simple_text_demo.py index 95c4f013a244..12b35858ad91 100644 --- a/models/tt_transformers/demo/simple_text_demo.py +++ b/models/tt_transformers/demo/simple_text_demo.py @@ -945,7 +945,15 @@ def test_demo_text( ) # Benchmark targets - supported_models = ["Llama-3.2-1B", "Llama-3.2-3B", "Llama-3.1-8B", "Llama-3.2-11B", "Llama-3.1-70B", "Mistral-7B"] + supported_models = [ + "Llama-3.2-1B", + "Llama-3.2-3B", + "Llama-3.1-8B", + "Llama-3.2-11B", + "Llama-3.1-70B", + "Mistral-7B", + "gemma-3-4b", + ] supported_devices = ["N150", "P100", "P150", "P300", "N300", "P150x4", "T3K", "TG"] tt_device_name = determine_device_name(mesh_device) # submesh device should not decide performance target @@ -994,6 +1002,9 @@ def test_demo_text( "N300_Mistral-7B": 38, # TODO Update target "T3K_Mistral-7B": 45, # TODO Update target "TG_Mistral-7B": 45, # TODO Update target + # + "N150_gemma-3-4b": 23, + "N300_gemma-3-4b": 38, # TODO Update target } if model_device_key in dict_target_decode_tok_s_u: target_decode_tok_s_u = dict_target_decode_tok_s_u[model_device_key] @@ -1075,6 +1086,7 @@ def test_demo_text( # "T3K_Qwen2.5-Coder-32B": 180, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24754) # "T3K_Qwen2.5-72B": 211, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24754) # "T3K_Qwen3-32B": 250, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24754) + "N150_gemma-3-4b": 100, # TODO Update target } ci_target_decode_tok_s_u = { # N150 targets - higher is better @@ -1082,8 +1094,10 @@ def test_demo_text( "N150_Llama-3.2-3B": 35, "N150_Llama-3.1-8B": 21, "N150_Mistral-7B": 23, + "N150_gemma-3-4b": 23, # TODO Update target # N300 targets "N300_Qwen2.5-7B": 20, + "N300_gemma-3-4b": 20, # TODO Update target # T3K targets # "T3K_Llama-3.1-70B": 16, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24303) # "T3K_Qwen2.5-72B": 13, # too much variability in CI (https://github.com/tenstorrent/tt-metal/issues/24303) diff --git a/models/tt_transformers/demo/simple_vision_demo.py b/models/tt_transformers/demo/simple_vision_demo.py index bcc27ce9c474..8c31cddf42c8 100644 --- a/models/tt_transformers/demo/simple_vision_demo.py +++ b/models/tt_transformers/demo/simple_vision_demo.py @@ -480,12 +480,14 @@ def test_multimodal_demo_text( "N300_Llama-3.2-11B": 23.5, "T3K_Llama-3.2-11B": 21.5, "T3K_Llama-3.2-90B": 3, + "N300_gemma-3-4b": 390, }[f"{tt_device_name}_{base_model_name}"] target_decode_tok_s_u = { "N300_Llama-3.2-11B": 21.5, "T3K_Llama-3.2-11B": 37, "T3K_Llama-3.2-90B": 6, + "N300_gemma-3-4b": 24, }[f"{tt_device_name}_{base_model_name}"] target_decode_tok_s = target_decode_tok_s_u * max_batch_size diff --git a/models/tt_transformers/tests/reference_outputs/gemma-3-4b-it.refpt b/models/tt_transformers/tests/reference_outputs/gemma-3-4b-it.refpt new file mode 100644 index 0000000000000000000000000000000000000000..fc22f76e348823f73c3ef1623d2d7612ab648520 GIT binary patch literal 50684 zcmcKD3Aoo|`ake*C}~B>P9zc~Eh;pTT@#5RTdAa??c`)TfLGW?kC&1zQY z)M@C-b={7xYdK=f=$aLyOSA7!nmA_PC~H=;($GeAO}f=I9=c*(I%@LNnkIuPZEI^* z-e+KCP-Ufoji=W%9oTqaqv-b<&Joacr>Lx~BEmIyNiS)~s5`W?-Yb ze@?GiZO^$Mtx%VlUbFhyW9tdFsjX={q-Kq&T?Y?bx#^?NuhVbEI#KKY8(bCAH7k7z zNJk!DJ9<*RsJjm6xfD~_E>AjsTy5?6(e)xs>-WamP3n}Z|F60TZNDW#y;ZmB_Fegp zyt>{nmCA(&*Bh!*dGXS3lFDN%MEcGZQQtk1iuTI&o7Xtn&&z(FWxp3PeJNA@wEKIo zt*F}$5DER(*XvnFY+?QvB#t)yJ(vMe7`uM2FA3gkQvi*+h2fwd!eA0W1EN`3b zrmY?A4x1He{j5>{_t-X(vXk<%{U`OIzV5I4>XH5>`O)W?%+F4K6H;Gvm22N7+mUy8 z=E)2HGYZ`9PCn!;aeDKNkZ=8rM=!OGeEW-n!S2ULUc5rDWBt5R_oqMl#?8HbqW-Ew zBHe6Uq_3_P>23WZCI3n3*Yw{n>F=C+D%Tzk>a{mc`FE4F8@cAp55C7QiPSjinz+4{ zI86Oz)SsOFFHAY!%=C*KZ$Din+6_;8H)<1k^1Pb(pPPP5{vD=9yPBj&AN{nGSASH> zGc)_0oq7EKJRs=4-7(Th>DTzc0X=?&Us=9&i=cyF*{&?VC~?F`N>1&&Bpyd5-p}t7 ze6CHo&q+N0mhu0^Bs44?{msPx7l%c^v?ULhT zr4iAezrb6Z+a~>GDV>Z{Yi$($CN7S2^#zfB6znTk6?TLZe8g>Xw>u;{&f6mB^v`)< zpY&_^lY?F8#a>J0PrrLU`+uJCPn?*Y@}W03{bEw`MdzQX&mQZ9TxVvSVPATp^Lmbt z)i#Ru@Ar&U|L3y4?^;p*=k}4#NICaUyG%_Sp1LpUCujW!10t{gO-bkUU88(l+Ka!u znEc4sI_aOVMJT4-r`e7i+b6$kw~Tfpa(y%4q{ySE-_TqSu@k${uZ;JPBrY2y-qt(p z$S-|Nb=XuU%QMA3kuAhYP&n2^a0iLr(I; zMZ4W|9KlIk#HVbJ9(=Sz?~}tqZ|xp!7x<}HJ|UVdUC)aTa5*pQ;bYwaAGlkm>yJKt z)x+u1$3q@+H$Ef+i~Yl!2L|Jb8E??*Q{?HRywr!h#?$9HUORUR`t+m6Wf`yT&v|`e zwkNlCi!&bJtDZm8OZms?Z|X-CdGW982S5F^Lw8Zir=NP|m5eWMWV<$*UoTVn_L;{| zdvvtZ4-P6~DYX%E`5TmUm7*|7;%ZK6^P* zar&*a*N)ll%AKNqcnZ*aP~`DLpWH37U$ac%K>xBmd8FFIsl=zm<%pCQE^z9U{P`_h z&JXb`2b~=M;K%&+v(2M?T+2vn_KQ?Kf1VM=6@2;Kj5VV|Ie*rEo5G*@k$KK}gZ}V> zH(cNiFZjYsec2zsAsHXYjj#5~<;g>ivb}Qhk%t`e+M!GSvY&eG$xSZ(=&QeRgkLp( z;V1OsAN->9pYGR$UByN1yLE_i@$>W+k+%;1DE%n6Hx=W@e5ib@^yAypE~n-E`^C92 zj;}o;=zg$Oq>Xdm{ba`H-e*L)c*$SwqxoUGwhi0ySM{ZT!W;h9Wu<+}JiD`txWF## zr=A_z55HdPg}metC-|Ro{MnP=u)lic^YY=|>xhVh1 zQw{YKM?SqX+MCB4uM_Re@7rgbQg8krn&$#=T)1h_VHf*Y`=YL?=Sw#Pef@rVS@i3a zacqw~7kMD#mwxDcwpX-=JAYeuNt91ay!esw)9_hi;ngW*XR2DmMx zsmatnvDNpZzp*Deo)`sKP8UFF&- zFX>&K&q1_<-^urfyl@?JTgbUyj=xXtkAZqc(?}O=9rYtliu8o^r-yF~Iz97w+i?@4 zJ$|J;aOPL=UGT%`2lqE~eucYzw(*1h!|g(zCVNL(?z^>n|AnBR;#Eqyv@88VJTPt^ zc`o>zcw?mE1A6GNCwug>$~oJ_)+h){Xid($QL{ z9GfMbjW&$>HkmJRv{bt--{00j*4`%#QB{bk#B!$$V>j;9unp7ffqSS`S=OF_z_(A4O}KH4*E^6 z48ENnK;x~6>(3{mIQuy(s zM)PCb8jt@gF7SX4efU9%3q1IjIMgcKYw7tH9N+~HDSq&o5b9OXujW_mI(fZ@xQKJ` z88Iv3(%9UO!bQBokKe47`@)kt1l`Zii1hf;k?xlJ;6>enE_x*$^5awL3vW1Uzbu@M z-xAlob03TzJKzgnc$a=y4QKHU-u!Rs4uJ>#;B8$x>y@a7&r!KAKI*!_ZH1hd)K|mL zzJng*FY)82%K4*t>tDGZuI{@n~|l(5HXy%e4FG*(gUxzia2kKro-0 zXT?W&i<5tA9d!8Fqjv^Q@STe-J{w2=V zDfESJ>8I8w;(__bdPIMIYCY*Zj9u|(Pkt*-!;N2>SKtWWms3G{KEFxm=lP=Nr4xEZ z`E!p3Zky&lM87%nqh7wWGrNA3>sRvAhaU8&2mIjm(U!r-`~qk5v;NjE<{9f4eBp(^ zelvo61%4%7J%1bxhHe)rKUp34s-hBo5IDas1oimfKlD~e{h2%+~8<_MW@6IzVL!0e(==48h$z4D!)56^x<#j-*a||a`W;* zEyK^s`L$KzvoLXNwrz|%_@x;t{O#|Zf-XNKKY8gjF`8BA1wXj`=7wmX9bDmso_PVT z@WU5g`07{9|BTzyQ!)6_liu_tpK-Xuse$9Xdm}Y}^#4WVe|Bx8B|UV|MUNh({3UMi zLkEr}A9%rW(XRt%xO7UuGO6GXFYU-v`rpAL!cmN4_)X6FVZHRDm*)iEyV757+%5B) zMymZE!`&+Po)YE8H#^Xe-te0g%`4Vx?!Rr9@#BJ|>wIQ;>A{2k>;RW?p60jaA$AcL z;LR@W&I-ZNd*Qjrm-v_V;V-(Z)Zw@RJ^$ZPjhKL4@bzv0Kh7hU#a zfBwJ^O1r_A{g!1n_ClAP*o_^HEA&gg?1nG9vYYW!+KoM%JRdlT+n?ol6K8H)GwMGL zcq-TDJk}!Zd}f{(HOoBQ;8^0f?YtN$Pvki+{mXuDW&NDa!T;qJk$R4){jFW1TpnL^ z@vmk#_`w&B;-B$W&L8Y|QIp`W|9xi#P;YM%>5%QCepVz);~zdHe|BRp_B(jTATTBM zwQsmDpGW@m=TYx@k$aaV9&mscd}gM6a3e3h$XVi|efz0F04|N^M!rQZX#RO+T z+817Mf*V}*V>fnE&R*nXm;3(|0?l|aQgjzQ67sPZKIG8Oesp>5_FQey<^S^%Pv`yj zJiV<++0yz2cTc4MEN1_ga~?7w=%H|^B_tY4JRofLX^x;*@#W!mS8r-RN`xqf}S zbL4yFeyX%Dd+wFZEg#7E|{8i%lmrJ9*EQd3E;Y~k!!50qnE^#(~ z(Dl3wzAGQqaQsxd2F}jwO1}7^HzCyc)_Ie-XC85mcSWn{ApZO=_p8HONBIW}0{0RZ zIOFSHN+};4N_?u}!aw-mkcUH#Zg&PQk3Sgcq4!2P`fw=y4<6c~Z@p0sm+g-Y-1RT@ z{W|?fJ$&G?EPo*{x%8`s3qNrm<;izKe>hy(JLI*Vo^ej(;ZgEqC-TE(g^oe@trH`i zp9J6qmkv2!T@q|6&cQbw82z+EU)+V0xJ!;%(QWB-Pq^?KxbO?}($eviehUx#iNhB) z98XK;2czZ1#dv`aTqbWFbUNpL_~?UToOm8%KT!$xmDaaKd-}tL-IwJr@L$Jk&5C%DiX-x42m$iq*J7ycv8z{mV$Ui19Ub1%2ZmkjYv6=l&-80};E#`X~wfJ*au4@1;NXd?@lI9rR0jGo$&^eGmQVdtTOm3+LP~(gW_#jtn5-%MZUg zJ<9EiW+dH`AA003aXxUb=(y&u16clO-E2JC_t@`vzJ)G4@Usu%f7+?nUOgPYw`#~U zrd6an<~@RUa=&y^mZO6%dg!1#v%qCm=JjKjW!Zz@!`;4Sw_Ini4|?n&&o1O4e`yc+ zn@7Y2^Cr3Vr}vHnLNWNPnDZUn`t)pwr+y_pxWSRUaH9vk^e^#)7yYbDo;xoV1n_bm zVjlrd_?7&~gD?7JJiTjR=?lBO?ECKCjv-?_C~~am)R~cYhY`hSz^# zsqT**>;uo)DC%eCy6E*MBmc(Ukv@?0)vuK22tzlB`V;nwls(v$e(b<5>XqxK{j^R& zPhNZN^h0lEG_RERTHPnzIrS^&1LriqZy5y2`y1|Ul+T0hyPg;v{Wd*50C(SX_Z)7sj-g|KF z7x}^5e10k}{-Y-7xhHbKb&>yg@9?A910x-KN2L6ST}u108#}VwtRTDeIbS(mj7#@G z*bz?7C(YO8x`ux4v*3sSdC4E&^D~dWdE5A`Ju4bqvsvhkzUN1tN5Hdpu^+dOwBNes zS3wV6Nlgn1se%S8@$vzuPpT-e~Baf;3nR~6Mpb=E>zwJF^{5;KfKtH9Q6Q4hYK9wLvA?1!@O5M*K+@(=XuY4hetd z@3s%QTjzQ9@s*%+OFlo&*DNaN+9y5UCHjr6Vqf+gAI&PxW8eWFeg_vgmG-cI;g{wa zc7VHauq4+{=$Cx)Q$Hi+M4vs_f&cUSk6K5EqjMkOeYJTx&wrY@h=Y}!=fwp$^4Dd_ zR}B~P!h@f}13rs>9r8M-a-IhV{NYgA2OoIA2QHqUI$vG@sr|*&-mm&wORZF9&msYe<|%DPKY1k zvV98N*~Pgb`?#L~fBeXSFL~hK|FpoBeb_^s9yuu%n(Q*ORp`ea>;!lC*st(k_e0pj zb1Cup?p)7qyGhVT2hQ+?w{;i&=tWQZ!$UlPGkoFA@8Rei3B5fUhk(x8O1}89E57_4 zzHo=LbH@{M|0OQFHy~bnZUmPe>F44BoNmwe2(?oVH#k;{8}y|QJxct<3;5B`IW0P) zGH$_hdGA}4^BWxD21odf?GSz=Ubv43&xbc_=vQ!CvR{Ef!(ma+PdCeb)~e~R zUnEfYANW$VH!iG?OM9>byNKtdU%`<*_!oP?pI@^Jd%&GtoNu7(-Uq(;7*Fi4J-VeG z*oQsFff*Ek^W8#v#9i#P#)_FyOSk&B$> zb4=$x)~ocU=lF1&rSCJ9_54ITIFXAS@E~_JT;RcP%ro{s?nl@Um~ZT#;D8UlWwGMTkwsSte+9JmvKU{N60B9dyjUGC1ColX;eMigs{tj`&dTDA!JX z>3`aho4oXb8-2=nMXwT1xVcy0dAfPm{Q`XP!ylgb;=^y?Se|E_U*YJS)bmm6pJAf{ zpPf&Q^rT#`crNquv?zz?p6RC#oEHBWlFwnUdnej``9Y-cEpc~0R=IZUGAX(*y^sHR zt|KlR6cw#bi1f(qBQ=kebkOIY=sHJ*w{uzZ4m{15z7OHNwP&tVoY#5p(s}DzPljCH z`*-eIp0he%^}d|(;`b?c(sm+230 zxH%uaW98_lU8g0H=YRUC*B`!wW!f#2qb45S&+(Ut1$MompHw}5we<*Q5m)`hk=YGN0eG<1`kv98t2mn{# zWkE-OIHF5`_+8Mop5k$JrXWUl8y0 zgBQK<+hkGTBW?V7;52)5lyClYO8gEQQh*H1lvLz~zEIq4(|?$l**M_`-qTKA!8D19BZSrfJ~2dftm~ zH#X?&2fxxU`73_#gBShD0YBxXztSIma9eM)kjJ>^zonm&7o8GE{tCzSz6!eY=R^uW z^3tnKjz2g&mhxvR2gN#cEB_prl&t)d=&)(_}b!xMe}S`9yb zteyC7r%uM{NPdg6Teh$ow@f0;m}83 z-q0W6=R1`pj`VAi_mofDJ^H`9TcoW|jC4lk;RiQ3@kcl+r!PKmq%Zv7q&@$$Plq3W zgBu*tEAgA1?{}#`EA@k~ymsC4LFvV>h2HR^4}TL^oFDdiD%wBzaHR13LB^q{FNpFI zKYm!!#Sd=$&OQo`;@rZFOaIt2`0&5Sj)?sFIgT%D5&3Z$r!Q%n!R-3s}h@xkfe{7WicoFC0A)*E-_IpddSM+N%tftUu zBIEfpd9E^khiKRN*+}6G*S0OA9PW*~Mc#NfuILYc{?IgWy*&3#H*|^qE2KYk7##WS zGVgiyA38_5^Y-qSWIoSp=*te*X1Vhc_>)U~gf~0+{Uzlk9rq)eG!OnarJTRtDDnex zoQ})!41fMt`VD!caD*GV$iG4IbuQMYZLC{|l(!S5Y(3idSvp(pN z7=cL@vy}A%k=a86R$RD&redmIqOHiRtZe=u95#>vq=Av{o&Mq=kS-`pBMH2 zmpEKDFUpU}^Uy)7M!owPU$=-nyP?M(Kj{+n)+_Kde;&I_(4Uy{Xs6%f8Bgck8;VX? zE$A(57U}(&-#O<){P`6>QZ9~I_nPn3Yj1tjH}U;f`iXYp`jU*-^!IyV+L1$iW5$WLX{QZS-ZL^iC)b0w=X^?@`RUih`aMGZ&P_hsAJDM=?!byuX<8waw>|E%Q0^z(Owa@Hg_Zllh%|U)8_LDLngFO=Fzj zb7s^JdoJP;(>{0sgHP|AA0&5_vlOEfgZeG%W-4e z8pq|h-6i8BI_2*T^4nIaKmX)6)-n9mcb@nuIpM^<^ure)dZ5p5<&6XWc~#nDK|U9p zQxo?3%@&dJ(?z>Q{))t(T;w3v?DU5|nU?a69}{#+IhB)xyesASSd{A^xUf^n-#Ers zyAmIKyr1>)F(FqT$5-CpJoB?6)t+2%TzPoZ!_zp2+wj^L7hfjM@GIrEE`uMxRbJu? ze|`Z^dW-MIvEQFHzDK?h2C|PFaa`zeaXv>y7hm$?3s>WD$Bgsj*gNCPiRm}=F+Pn) zbj`=&jd|F*%KVIP(;QFui4Wy|vS+K%Yhlihb8_8!;f_&`ztp%#wxk7k_;9za#zkxa9MzTzB{#WpQj)K9^a1Zs5>0*Fg&s7j*d-{^TJizRI7-`Eu^q zpxZysnTPEW`KNyw>B@ISN}kPgyqP z5hn&Tj`Xq9Laq;T0or%JC@=f7H$3rE4nO7A1?1;X{A;6(xBSX}So>*(9~&?H+(HJ@;j|4~x8gDmvnjeVq7& zu6?6%WF1VtTXP+X4!Y=wZ)IE)$HX`L3;PQDiPv-d+DEuwWq%M429H-=E zALEO>&eh%jaX#eU&h)hdKhM9|&w9}Jk47c#kG>x5?bBAvIMrrwln*~TQgYq6QRLr# zJN*0l=ObMu-vfGfO4QGJGE(;o@i)J{mG3uS)HB){SD&RHo}BAPzX|GZhmR> z`=np+|I=BK*YA;TQQqy`eS&u)tPU{eS$bVh#8%lpV zp>@#v(|(a^H{;C6d;b~T(ofOjuiEiXd49@2&CmRlKe``U`t9B6pVzhsxi-%A;~`H+ ze*cRig`f8`&(8UspPHY<1AeYv|E2z!e6620NgVmVeVX|W9e8Nx_x1Rz-*qzoi3`S8 zNrznUFX>&Ca*EUFSg#s~#--mIz9jW=E?c%!?|zkjrF`UKZ+;<9e$UgKBf;l~c`h?K z^=O^x(%(x@RM&KR3$B$)7!kcRx&f zxEzvkT|eil`uEQD`|i0uYPNpJwM)jkJ^P0~{2pDn@;mg`$@u&J_93tMb>{rYi*w=? zf7$XM!RMB>qMiG-{ABaqQC^ep9gAD1w2yN1CXNd_pF~nwck8IfcS62cJR|+A$#c>0 z<87nen(3b%ejfQ>y&Eb1Yb4zj$HYJQJ^j~By!jD7IWFht1t&zmIlm9ON%y29 zqI~--&*qgTX&>*ystG*XcL+d_VH!;_u`!Zs9MrU!I-v7`ORYsUp3%dC=zv zeK(H0xKxw<)sthb9M}BFIfi}mrbmYVIj3|!dE?$e|LL?_pUoowLE^Y^;(P0w4a+|X zauxnA-jjswnME^I^5B5)g{Q0?2e*B!YPVC``ON2|d|`gK{rndq->+wMT$Fb3y$JK=D@pg1q;q|)PuUM2 z@w*)Vr=>rxo$H}>bHDb|Dxv2QX&-)7w%;K8<3n$G{P-WgE$#N&6``Q#)+?o7(nEXp zMW5aIqjt)*uSxva&3b};><8>Cl(SE%hkeE9?qMhU@D&C`{=szvznPn+9g{!($zRef zE`;n`pr$2huSK|Zjih17rZ@zb~G&}8q|DRHBbn)j0&QF}HiW|;7cF5--;>QNL zz*`~tprhUXXNRB%uNQuze)MBeE)PF+>G#Hg(e9(1_kW%H-9Nq`^vo}H_pj-%^u&iheVBGvub=0t_8;0ko4EH+{r;HqRmV3&5BJiJ&VA>@iPOdz zm(e#K@HHP>Z!f)9lJa}bPJZ}YkoB|Ep6J36Kk-d}{#A{R`VZ4DUdZ1;-*b=9(|FiB z{p0yu7yo0EsGpqsxPwxz(*EW81PLZ{ku%3CcZs09=w|6t@2($ZQ{2iMBI9A%ZXe2)EB?89UJP49=-Vox~JwmLC>G=75!(f z9x1w|d?g)rHJ@wm-1o&6LGOr+-`Xkne9L^lPTGxME$3JCZ%Mh?)$<E;_~u`q~*M z_FL?%ojCeY>gzo_addLVRs7)xFXP(0ws-52Yu(ikyl4t>Ynlc;T#`(EeHXprJUP$2r1aR zdzSm%CFQ;w>vyc+V0^Uh8g#yz9`TW1cTJ7^J`m+SlD_pNT+hyV9lvR72ffF(iIm;# zrzRv__Od=St|l&ye%jyLC-NuV7Ab!?C$g1yo5w#+PB|K7oUTcE>APmeALUzTe6jAw zKVLJh*oU2y^Ra&SBp>>_SGij9Q(oHb)-6NMi&8%PUTPiX<~ewi>*}1}W+(kUavuhV z=W;%px<$~rDbK&u&q%x{M0MrO9PgiHzvcBC@}8G;JP-0+tWtmR_LIY+1HT%V;~W2K z{&)SL=yzG_tNn~TFJxEG39VP#mtFb&4;s>4cS+=XW&JDj zBfmugXr21%Q_(=YJ0!o4dez0z{^&?5{P}<72Os@6xg_+VzwhX@+duTaI@fn&GMPhxmv1<9jdWOW)V={T%Be-@m~hJ>RkEnfT61KISFw z)$rG{pYK~M*U$H9s`cx%Q7}5U;OF~IzI$G+zwdOFe4FHYrCPsM83#%|N2Va=gDdi! zyc!+f@htiKow!-Agklp?KkZ8U`8^i)b5G}-9ipS(Jz8G7xi2vJ@aTW}$VjW{QQD7x z`(9|Zex?2N^Sx2Ok6x{RX+OVz>-WPiIqbj7UCoZZ%UPb&!k@pF_VYbN^PBI{Rg>R$ zUHPxyfAzb)ewWuex%`fGDUa{P&Iq+D%c5ueZ;$Vdg5c-(XqLCX?^lxN>s*KW z&Z~8SIMRPW2=4bF%HM&3YdL@UE?_x-`L3n$X#eiJmg5sAbj6i&9pe3B-@UJfS2>Rv zzfUcSaYe6c{mc1Gf8Rf;)~}q$PRw=9Gl{qOPw#4Us?BHX=Q_l?WW5bTpU1oWcYiGB zx3^QC#iOEsH95+8%{b~jI{M8{|K&IH=Kgm%%6SdF%7)Poz4Cj{rC(H=$9%`xccH84 zRnAxJ`&@osjGxkLS#~eyG2?5=;Qx+CIe%6A&Ty%JIX^AP-#y|NzwG(n`Bs~M#DCwH zt%hGY@4&B&kL%{X-FHy0%kyF1H=P_}R?0Z?zwTq~``o8?4&t7GeanR8=YFg6KX!Is zn*BW|wvUF#l5qb@+23~FznXN9H~+zRuIVkK)`{IsU#Qpr7>v z9Cq6~_E+V5`rfnm9=&*Io#j41ds&ZpkJb6Sa`!&HN9r73z4tuJe%`P2{v5f-)SEv)^i0F~ z+k0Hsi>@>i!^-Xky%xF>gbK6iB=)B8d6ot*N?yD#sa zJpFyI#yxiT(#g*beK!s~jq{Vw2wX*w-B&J%BkOndjwtgUY*;I zc`W$D`_NoZvlD&X)8&8YzWqqh9h>8hUha4Co5{ZkKJ3swpTDB>!#_0iEB8C`f4^_= zVXse<-~Z)vFL4--?eckGpHV@VT<#&V!+QC=(Zl^ExQ~B4_hx&)h4wk#y*`pp%Hqeo8}H)8y_6Gqh5Ni%%h;kyp#wA=!1-J|<9 zJ-ThvXPdr#`u6D4t4Fs!-FkKF)@_^ay}I}A(YsIIZF+U@-mOoM-t|)cb%Uk@JJm^8 z|KH$&jlS^!H#Hu8ew}{xKdoHsLwEceudSXQY literal 0 HcmV?d00001 diff --git a/models/tt_transformers/tests/test_accuracy.py b/models/tt_transformers/tests/test_accuracy.py index 78de89940988..934f53f67399 100644 --- a/models/tt_transformers/tests/test_accuracy.py +++ b/models/tt_transformers/tests/test_accuracy.py @@ -245,6 +245,19 @@ def test_tt_model_acc( theta=model_args.rope_theta, rope_scaling=model_args.rope_scaling, ) + + if model_args.rope_local_theta is not None: + # If local theta is set, use it to compute the local rope matrices + rot_mats_local = get_rot_mats( + head_dim=model_args.head_dim, + device=mesh_device, + seq_len=prefill_lens[0], + theta=model_args.rope_local_theta, + rope_scaling=None, + ) + else: + rot_mats_local = None + prefill_input = model_args.prepare_residual_tensor_prefill( pt_prefill_input[batch_id], ) @@ -252,7 +265,7 @@ def test_tt_model_acc( tt_out = tt_model( prefill_input, current_pos=None, - rot_mats=rot_mats_prefill, + rot_mats=[rot_mats_prefill, rot_mats_local], user_id=batch_id, mode="prefill", page_table=page_table_tt, @@ -280,7 +293,7 @@ def test_tt_model_acc( # Get cos/sin matrices for the current position of each user rot_mats = tt_model.rope_setup.get_rot_mats(current_pos) - + rot_mats_local = None if tt_model.rope_setup_local is None else tt_model.rope_setup.get_rot_mats(current_pos) # Print table header if use_reference_file: logger.info(f"{'Progress':<15}{'Correct':<8}{'True':<15}{'Actual':<15}{'Top 5 Predictions':<75}") @@ -310,7 +323,7 @@ def test_tt_model_acc( tt_out = tt_model( decode_input, current_pos_tensor, - rot_mats=rot_mats, + rot_mats=[rot_mats, rot_mats_local], mode="decode", page_table=page_table_tt, ) @@ -351,7 +364,9 @@ def test_tt_model_acc( # Update rot_mats for next iteration current_pos += 1 rot_mats = tt_model.rope_setup.get_rot_mats(current_pos) - + rot_mats_local = ( + tt_model.rope_setup_local.get_rot_mats(current_pos) if tt_model.rope_setup_local is not None else None + ) # Modify the accuracy checking section when using reference text if not use_reference_file: # Get probabilities from model output diff --git a/tests/scripts/single_card/run_single_card_demo_tests.sh b/tests/scripts/single_card/run_single_card_demo_tests.sh index 59f6ee8b82df..3bb997dd46f3 100755 --- a/tests/scripts/single_card/run_single_card_demo_tests.sh +++ b/tests/scripts/single_card/run_single_card_demo_tests.sh @@ -21,6 +21,14 @@ run_qwen7b_func() { } + +run_gemma3_4b_func() { + + HF_MODEL=google/gemma-3-1b-it MESH_DEVICE=N300 pytest -n auto models/tt_transformers/demo/simple_text_demo.py -k performance-ci-1 --timeout 1800 + +} + + run_qwen25_vl_func() { fail=0