fix: llama arch implementation (#17665)

giladgd · web-flow · commit 00c361fe53e5 · 2025-12-01T21:21:13.000+01:00
diff --git a/src/llama-model.cpp b/src/llama-model.cpp
@@ -626,6 +626,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {
     switch (arch) {
         case LLM_ARCH_LLAMA:
             {
+                ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);
+
                 if (hparams.n_expert == 8) {
                     switch (hparams.n_layer) {
                         case 32: type = LLM_TYPE_8x7B; break;

Original file line number	Diff line number	Diff line change
`@@ -626,6 +626,8 @@ void llama_model::load_hparams(llama_model_loader & ml) {`
`626`	`626`	`switch (arch) {`
`627`	`627`	`case LLM_ARCH_LLAMA:`
`628`	`628`	`{`
	`629`	`+ ml.get_key(LLM_KV_ATTENTION_LAYERNORM_RMS_EPS, hparams.f_norm_rms_eps);`
	`630`	`+`
`629`	`631`	`if (hparams.n_expert == 8) {`
`630`	`632`	`switch (hparams.n_layer) {`
`631`	`633`	`case 32: type = LLM_TYPE_8x7B; break;`