ggml-org
diff --git a/‎common/arg.cpp‎
Lines changed: 1 addition & 1 deletion b/‎common/arg.cpp‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/backend/CANN.md‎
Lines changed: 5 additions & 9 deletions b/‎docs/backend/CANN.md‎
Lines changed: 5 additions & 9 deletions
diff --git a/‎examples/model-conversion/Makefile‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/Makefile‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/scripts/causal/compare-embeddings-logits.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/scripts/causal/compare-embeddings-logits.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/scripts/causal/convert-model.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/scripts/causal/convert-model.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.sh‎ renamed to ‎examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.py‎
Lines changed: 3 additions & 2 deletions b/‎examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.sh‎ renamed to ‎examples/model-conversion/scripts/causal/run-casual-gen-embeddings-org.py‎
Lines changed: 3 additions & 2 deletions
diff --git a/‎examples/model-conversion/scripts/causal/run-converted-model-embeddings-logits.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/scripts/causal/run-converted-model-embeddings-logits.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/scripts/causal/run-converted-model.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/scripts/causal/run-converted-model.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/scripts/embedding/compare-embeddings-logits.sh‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎examples/model-conversion/scripts/embedding/convert-model.sh‎
Lines changed: 1 addition & 1 deletion b/‎examples/model-conversion/scripts/embedding/convert-model.sh‎
Lines changed: 1 addition & 1 deletion
@@ -2466,7 +2466,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_N_CPU_MOE_DRAFT"));
     add_opt(common_arg(
         {"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N",
-        "number of layers to store in VRAM",
+        string_format("max. number of layers to store in VRAM (default: %d)", params.n_gpu_layers),
         [](common_params & params, int value) {
             params.n_gpu_layers = value;
             if (!llama_supports_gpu_offload()) {
 
@@ -293,17 +293,14 @@ We would like to thank Tuo Dai, Shanni Li, and all of the project maintainers fr
 
 ## Environment variable setup
 
-### GGML_CANN_ASYNC_MODE
-
-Enables asynchronous operator submission. Disabled by default.
-
 ### GGML_CANN_MEM_POOL
 
-Specifies the memory pool management strategy:
+Specifies the memory pool management strategy, Default is vmm.
 
 - vmm: Utilizes a virtual memory manager pool. If hardware support for VMM is unavailable, falls back to the legacy (leg) memory pool.
 
 - prio: Employs a priority queue-based memory pool management.
+
 - leg: Uses a fixed-size buffer pool.
 
 ### GGML_CANN_DISABLE_BUF_POOL_CLEAN
@@ -312,9 +309,8 @@ Controls automatic cleanup of the memory pool. This option is only effective whe
 
 ### GGML_CANN_WEIGHT_NZ
 
-Converting the matmul weight format from ND to NZ can significantly improve performance on the 310I DUO NPU.
+Converting the matmul weight format from ND to NZ to improve performance. Enabled by default.
 
-### GGML_CANN_DISABLE_ACL_GRAPH
+### GGML_CANN_ACL_GRAPH
 
-When this variable is set, ACL graph execution is disabled and operators are executed in an op-by-op (eager) mode.
-This mode is mainly intended for debugging or for cases where the overhead of graph construction and execution is not desirable.
+Operators are executed using ACL graph execution, rather than in op-by-op (eager) mode. Enabled by default.
@@ -63,7 +63,7 @@ causal-verify-logits: causal-run-original-model causal-run-converted-model
 	@MODEL_PATH="$(MODEL_PATH)" ./scripts/utils/check-nmse.py -m ${MODEL_PATH}
 
 causal-run-original-embeddings:
-	@./scripts/causal/run-casual-gen-embeddings-org.sh
+	@./scripts/causal/run-casual-gen-embeddings-org.py
 
 causal-run-converted-embeddings:
 	@./scripts/causal/run-converted-model-embeddings-logits.sh
 
@@ -1,4 +1,4 @@
-#/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
 
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
 
@@ -3,11 +3,10 @@
 import argparse
 import os
 import importlib
-import sys
 import torch
 import numpy as np
 
-from transformers import AutoTokenizer, AutoConfig, AutoModel, AutoModelForCausalLM
+from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
 from pathlib import Path
 
 unreleased_model_name = os.getenv('UNRELEASED_MODEL_NAME')
@@ -43,6 +42,8 @@
         model = model_class.from_pretrained(model_path)
     except (ImportError, AttributeError) as e:
         print(f"Failed to import or load model: {e}")
+        print("Falling back to AutoModelForCausalLM")
+        model = AutoModelForCausalLM.from_pretrained(model_path)
 else:
     model = AutoModelForCausalLM.from_pretrained(model_path)
 print(f"Model class: {type(model)}")
 
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
 
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
 
@@ -1,4 +1,4 @@
-#/bin/bash
+#!/usr/bin/env bash
 
 set -e
 
 
@@ -1,4 +1,4 @@
-#!/bin/bash
+#!/usr/bin/env bash
 
 set -e
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#/bin/bash`
	`1`	`+#!/usr/bin/env bash`
`2`	`2`
`3`	`3`	`set -e`
`4`	`4`
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,4 @@`
`1`		`-#!/bin/bash`
	`1`	`+#!/usr/bin/env bash`
`2`	`2`
`3`	`3`	`set -e`
`4`	`4`