This repository was archived by the owner on May 11, 2025. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 295
This repository was archived by the owner on May 11, 2025. It is now read-only.
TypeError: Qwen2Attention.forward() missing 1 required positional argument: 'attention_mask' #749
Copy link
Copy link
Open
Description
I face a problem when quantifing my fine-tuned model qwen2.5-14B-instruct.
I follow the instruction in this : https://qwen.readthedocs.io/en/latest/quantization/awq.html
My code is:
from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer
# Specify paths and hyperparameters for quantization
model_path = "/mnt/HDD/Hhr/model/Qwen2.5-14B-Instruct-lora-sft-merged"
quant_path = "/mnt/HDD/Hhr/model/Qwen2.5-14B-Instruct-lora-sft-merged-awq"
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }
# Load your tokenizer and model with AutoAWQ
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoAWQForCausalLM.from_pretrained(model_path, device_map="auto", safetensors=True)
import json
# 加载 qwen2.5-14B-instruct-svg-awq.json 文件
input_path = "/home/user1809/HuahaiRan/LLaMA-Factory/data/qwen2.5-14B-instruct-svg-awq.json"
with open(input_path, 'r', encoding='utf-8') as f:
dataset = json.load(f)
data = []
for msg in dataset:
# 假设这里的 msg 结构需要调整以适配 apply_chat_template 方法
# 这里简单取第一个对话中的用户内容作为示例,可根据实际情况修改
user_content = next((item["content"] for item in msg if item["role"] == "user"), "")
text = tokenizer.apply_chat_template(user_content, tokenize=False, add_generation_prompt=False)
data.append(text.strip())
model.quantize(tokenizer, quant_config=quant_config, calib_data=data)
model.save_quantized(quant_path, safetensors=True, shard_size="4GB")
tokenizer.save_pretrained(quant_path)
My fine-tune dataset is as follows:
[
{
"role": "system",
"content": "You are a svg code generater, generate SVG code to visually represent the following text description"
},
{
"role": "user",
"content": "Generate SVG code to visually represent the following text description, while respecting the given constraints.\n\n<constraints>\n\n* **Allowed Elements:** `svg`, `path`, `circle`, `rect`, `ellipse`, `line`, `polyline`, `polygon`, `g`, `linearGradient`, `radialGradient`, `stop`, `defs`\n\n* **Allowed Attributes:** `viewBox`, `width`, `height`, `fill`, `stroke`, `stroke-width`, `d`, `cx`, `cy`, `r`, `x`, `y`, `rx`, `ry`, `x1`, `y1`, `x2`, `y2`, `points`, `transform`, `opacity`\n\n</constraints>\n\n<example>\n\n<description>\"A red circle with a blue square inside\"</description>\n\n```svg\n\n<svg viewBox=\"0 0 256 256\" width=\"256\" height=\"256\">\n<circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\"/>\n<rect x=\"30\" y=\"30\" width=\"40\" height=\"40\" fill=\"blue\"/>\n</svg>\n</example>\nPlease ensure that the generated SVG code is well-formed, valid, and strictly adheres to these constraints. Focus on a clear and concise representation of the input description within the given limitations. Always give the complete SVG code with nothing omitted. Never use an ellipsis.\n\n<description>\"An indigo hexagon bloom glistens with morning dew.\"</description>\n\nsvg\n<svg viewBox=\"0 0 256 256\" width=\"256\" height=\"256\">"
},
{
"role": "assistant",
"content": "<svg xmlns=\"http://www.w3.org/2000/svg\" clip-rule=\"evenodd\" image-rendering=\"optimizeQuality\" shape-rendering=\"geometricPrecision\" text-rendering=\"geometricPrecision\" viewBox=\"0 0 96 96\"><path fill=\"#fcfcfe\" d=\"M0 0h96v96H0z\"/><path fill=\"#26396d\" d=\"m47 23 19 13 4 18q-9 6-15 14h-4l-4 3q-6-3-8-8h-3q-6-5-9-11 0-9 3-17z\"/><path fill=\"#949db5\" d=\"M48 43q2 0 1 2z\"/><path fill=\"#d7dde6\" d=\"M49 60q2 0 1 3-2-1-1-3\"/></svg>"
}
],
The error is as follows:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[5], line 1
----> 1 model.quantize(tokenizer, quant_config=quant_config, calib_data=data)
File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/HuahaiRan/AutoAWQ/awq/models/base.py:242, in BaseAWQForCausalLM.quantize(self, tokenizer, quant_config, calib_data, split, text_column, duo_scaling, export_compatible, apply_clip, n_parallel_calib_samples, max_calib_samples, max_calib_seq_len, max_chunk_memory, quantizer_cls, **kwargs)
219 self.quant_config.modules_to_not_convert = self.modules_to_not_convert
221 self.quantizer = quantizer_cls(
222 self,
223 self.model,
(...)
240 **kwargs,
241 )
--> 242 self.quantizer.quantize()
244 self.is_quantized = True
File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:189, in AwqQuantizer.quantize(self)
185 # [STEP 2]: Compute and apply scale list
186 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(
187 self.modules[i], input_feat, self.module_kwargs
188 )
--> 189 scales_list = [
190 self._search_best_scale(self.modules[i], **layer)
191 for layer in module_config
192 ]
193 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)
194 scales_list = append_str_prefix(
195 scales_list, get_op_name(self.model, self.modules[i]) + "."
196 )
File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:190, in <listcomp>(.0)
185 # [STEP 2]: Compute and apply scale list
186 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(
187 self.modules[i], input_feat, self.module_kwargs
188 )
189 scales_list = [
--> 190 self._search_best_scale(self.modules[i], **layer)
191 for layer in module_config
192 ]
193 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)
194 scales_list = append_str_prefix(
195 scales_list, get_op_name(self.model, self.modules[i]) + "."
196 )
File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:350, in AwqQuantizer._search_best_scale(self, module, prev_op, layers, inp, module2inspect, kwargs)
348 with torch.no_grad():
349 module_kwargs = self._sanitize_kwargs(kwargs, module2inspect)
--> 350 fp16_output = self._module_forward(inp, module2inspect, module_kwargs)
351 fp16_output = fp16_output.clip(torch.finfo(fp16_output.dtype).min, torch.finfo(fp16_output.dtype).max)
353 # [STEP 4]: Compute loss
File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)
113 @functools.wraps(func)
114 def decorate_context(*args, **kwargs):
115 with ctx_factory():
--> 116 return func(*args, **kwargs)
File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:270, in AwqQuantizer._module_forward(self, x, module, module_kwargs)
264 @torch.no_grad()
265 def _module_forward(
266 self, x: torch.Tensor, module: torch.nn.Module, module_kwargs: Dict
267 ) -> torch.Tensor:
268 if self.n_parallel_calib_samples is None :
269 # runs through all samples at once
--> 270 module_output = module(x, **module_kwargs)
271 if isinstance(module_output, tuple):
272 module_output = module_output[0]
File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)
1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]
1738 else :
-> 1739 return self._call_impl(*args, **kwargs)
File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)
1745 # If we don't have any hooks, we want to skip the rest of the logic in
1746 # this function, and just call forward.
1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks
1748 or _global_backward_pre_hooks or _global_backward_hooks
1749 or _global_forward_hooks or _global_forward_pre_hooks):
-> 1750 return forward_call(*args, **kwargs)
1752 result = None
1753 called_always_called_hooks = set()
File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/accelerate/hooks.py:176, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)
174 output = module._old_forward(*args, **kwargs)
175 else :
--> 176 output = module._old_forward(*args, **kwargs)
177 return module._hf_hook.post_forward(module, output)
TypeError: Qwen2Attention.forward() missing 1 required positional argument: 'attention_mask'
How can I fix it? Thanks.
Metadata
Metadata
Assignees
Labels
No labels