Misc. fixes.

pwilkin · pwilkin · commit bb5e624d96c5 · 2025-11-20T13:42:15.000+01:00
diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp
@@ -2546,6 +2546,7 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
     {LLM_TENSOR_SSM_X,                      {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
     {LLM_TENSOR_SSM_DT,                     {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
     {LLM_TENSOR_SSM_OUT,                    {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
+    {LLM_TENSOR_SSM_BETA_ALPHA,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
     {LLM_TENSOR_TIME_MIX_W1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
     {LLM_TENSOR_TIME_MIX_W2,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
     {LLM_TENSOR_TIME_MIX_A1,                {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
diff --git a/src/llama-context.cpp b/src/llama-context.cpp
@@ -1388,7 +1388,7 @@ void llama_context::output_reorder() {
 
 uint32_t llama_context::graph_max_nodes() const {
     if (model.arch == LLM_ARCH_QWEN3NEXT) {
-        return std::max<uint32_t>(8192, 32u*model.n_tensors());
+        return std::max<uint32_t>(8192u, 32u*model.n_tensors());
     }
     return std::max<uint32_t>(1024u, 8u*model.n_tensors());
 }

Original file line number	Diff line number	Diff line change
`@@ -1388,7 +1388,7 @@ void llama_context::output_reorder() {`
`1388`	`1388`
`1389`	`1389`	`uint32_t llama_context::graph_max_nodes() const {`
`1390`	`1390`	`if (model.arch == LLM_ARCH_QWEN3NEXT) {`
`1391`		`- return std::max<uint32_t>(8192, 32u*model.n_tensors());`
	`1391`	`+ return std::max<uint32_t>(8192u, 32u*model.n_tensors());`
`1392`	`1392`	`}`
`1393`	`1393`	`return std::max<uint32_t>(1024u, 8u*model.n_tensors());`
`1394`	`1394`	`}`