Skip to content
This repository was archived by the owner on May 11, 2025. It is now read-only.
This repository was archived by the owner on May 11, 2025. It is now read-only.

TypeError: Qwen2Attention.forward() missing 1 required positional argument: 'attention_mask' #749

@Charimanhua

Description

@Charimanhua

I face a problem when quantifing my fine-tuned model qwen2.5-14B-instruct.
I follow the instruction in this : https://qwen.readthedocs.io/en/latest/quantization/awq.html

My code is:

from awq import AutoAWQForCausalLM
from transformers import AutoTokenizer

# Specify paths and hyperparameters for quantization
model_path = "/mnt/HDD/Hhr/model/Qwen2.5-14B-Instruct-lora-sft-merged"
quant_path = "/mnt/HDD/Hhr/model/Qwen2.5-14B-Instruct-lora-sft-merged-awq"
quant_config = { "zero_point": True, "q_group_size": 128, "w_bit": 4, "version": "GEMM" }

# Load your tokenizer and model with AutoAWQ
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoAWQForCausalLM.from_pretrained(model_path, device_map="auto", safetensors=True)

import json

# 加载 qwen2.5-14B-instruct-svg-awq.json 文件
input_path = "/home/user1809/HuahaiRan/LLaMA-Factory/data/qwen2.5-14B-instruct-svg-awq.json"
with open(input_path, 'r', encoding='utf-8') as f:
    dataset = json.load(f)
    
data = []
for msg in dataset:
    # 假设这里的 msg 结构需要调整以适配 apply_chat_template 方法
    # 这里简单取第一个对话中的用户内容作为示例,可根据实际情况修改
    user_content = next((item["content"] for item in msg if item["role"] == "user"), "")
    text = tokenizer.apply_chat_template(user_content, tokenize=False, add_generation_prompt=False)
    data.append(text.strip())
    
model.quantize(tokenizer, quant_config=quant_config, calib_data=data)

model.save_quantized(quant_path, safetensors=True, shard_size="4GB")
tokenizer.save_pretrained(quant_path)

My fine-tune dataset is as follows:

[
    {
      "role": "system",
      "content": "You are a svg code generater, generate SVG code to visually represent the following text description"
    },
    {
      "role": "user",
      "content": "Generate SVG code to visually represent the following text description, while respecting the given constraints.\n\n<constraints>\n\n* **Allowed Elements:** `svg`, `path`, `circle`, `rect`, `ellipse`, `line`, `polyline`, `polygon`, `g`, `linearGradient`, `radialGradient`, `stop`, `defs`\n\n* **Allowed Attributes:** `viewBox`, `width`, `height`, `fill`, `stroke`, `stroke-width`, `d`, `cx`, `cy`, `r`, `x`, `y`, `rx`, `ry`, `x1`, `y1`, `x2`, `y2`, `points`, `transform`, `opacity`\n\n</constraints>\n\n<example>\n\n<description>\"A red circle with a blue square inside\"</description>\n\n```svg\n\n<svg viewBox=\"0 0 256 256\" width=\"256\" height=\"256\">\n<circle cx=\"50\" cy=\"50\" r=\"40\" fill=\"red\"/>\n<rect x=\"30\" y=\"30\" width=\"40\" height=\"40\" fill=\"blue\"/>\n</svg>\n</example>\nPlease ensure that the generated SVG code is well-formed, valid, and strictly adheres to these constraints. Focus on a clear and concise representation of the input description within the given limitations. Always give the complete SVG code with nothing omitted. Never use an ellipsis.\n\n<description>\"An indigo hexagon bloom glistens with morning dew.\"</description>\n\nsvg\n<svg viewBox=\"0 0 256 256\" width=\"256\" height=\"256\">"
    },
    {
      "role": "assistant",
      "content": "<svg xmlns=\"http://www.w3.org/2000/svg\" clip-rule=\"evenodd\" image-rendering=\"optimizeQuality\" shape-rendering=\"geometricPrecision\" text-rendering=\"geometricPrecision\" viewBox=\"0 0 96 96\"><path fill=\"#fcfcfe\" d=\"M0 0h96v96H0z\"/><path fill=\"#26396d\" d=\"m47 23 19 13 4 18q-9 6-15 14h-4l-4 3q-6-3-8-8h-3q-6-5-9-11 0-9 3-17z\"/><path fill=\"#949db5\" d=\"M48 43q2 0 1 2z\"/><path fill=\"#d7dde6\" d=\"M49 60q2 0 1 3-2-1-1-3\"/></svg>"
    }
  ],

The error is as follows:

---------------------------------------------------------------------------

TypeError                                 Traceback (most recent call last)

Cell In[5], line 1

----> 1 model.quantize(tokenizer, quant_config=quant_config, calib_data=data)

File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)

113 @functools.wraps(func)

114 def decorate_context(*args, **kwargs):

115 with ctx_factory():

--> 116 return func(*args, **kwargs)

File ~/HuahaiRan/AutoAWQ/awq/models/base.py:242, in BaseAWQForCausalLM.quantize(self, tokenizer, quant_config, calib_data, split, text_column, duo_scaling, export_compatible, apply_clip, n_parallel_calib_samples, max_calib_samples, max_calib_seq_len, max_chunk_memory, quantizer_cls, **kwargs)

219 self.quant_config.modules_to_not_convert = self.modules_to_not_convert

221 self.quantizer = quantizer_cls(

222 self,

223 self.model,

(...)

240 **kwargs,

241 )

--> 242 self.quantizer.quantize()

244 self.is_quantized = True

File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:189, in AwqQuantizer.quantize(self)

185 # [STEP 2]: Compute and apply scale list

186 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(

187 self.modules[i], input_feat, self.module_kwargs

188 )

--> 189 scales_list = [

190 self._search_best_scale(self.modules[i], **layer)

191 for layer in module_config

192 ]

193 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)

194 scales_list = append_str_prefix(

195 scales_list, get_op_name(self.model, self.modules[i]) + "."

196 )

File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:190, in <listcomp>(.0)

185 # [STEP 2]: Compute and apply scale list

186 module_config: List[Dict] = self.awq_model.get_layers_for_scaling(

187 self.modules[i], input_feat, self.module_kwargs

188 )

189 scales_list = [

--> 190     self._search_best_scale(self.modules[i], **layer)

191 for layer in module_config

192 ]

193 apply_scale(self.modules[i], scales_list, input_feat_dict=input_feat)

194 scales_list = append_str_prefix(

195 scales_list, get_op_name(self.model, self.modules[i]) + "."

196 )

File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)

113 @functools.wraps(func)

114 def decorate_context(*args, **kwargs):

115 with ctx_factory():

--> 116 return func(*args, **kwargs)

File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:350, in AwqQuantizer._search_best_scale(self, module, prev_op, layers, inp, module2inspect, kwargs)

348 with torch.no_grad():

349 module_kwargs = self._sanitize_kwargs(kwargs, module2inspect)

--> 350     fp16_output = self._module_forward(inp, module2inspect, module_kwargs)

351 fp16_output = fp16_output.clip(torch.finfo(fp16_output.dtype).min, torch.finfo(fp16_output.dtype).max)

353 # [STEP 4]: Compute loss

File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/utils/_contextlib.py:116, in context_decorator.<locals>.decorate_context(*args, **kwargs)

113 @functools.wraps(func)

114 def decorate_context(*args, **kwargs):

115 with ctx_factory():

--> 116 return func(*args, **kwargs)

File ~/HuahaiRan/AutoAWQ/awq/quantize/quantizer.py:270, in AwqQuantizer._module_forward(self, x, module, module_kwargs)

264 @torch.no_grad()

265 def _module_forward(

266 self, x: torch.Tensor, module: torch.nn.Module, module_kwargs: Dict

267 ) -> torch.Tensor:

268 if self.n_parallel_calib_samples is None :

269 # runs through all samples at once

--> 270         module_output = module(x, **module_kwargs)

271 if isinstance(module_output, tuple):

272 module_output = module_output[0]

File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/nn/modules/module.py:1739, in Module._wrapped_call_impl(self, *args, **kwargs)

1737 return self._compiled_call_impl(*args, **kwargs) # type: ignore[misc]

1738 else :

-> 1739 return self._call_impl(*args, **kwargs)

File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/torch/nn/modules/module.py:1750, in Module._call_impl(self, *args, **kwargs)

1745 # If we don't have any hooks, we want to skip the rest of the logic in

1746 # this function, and just call forward.

1747 if not (self._backward_hooks or self._backward_pre_hooks or self._forward_hooks or self._forward_pre_hooks

1748 or _global_backward_pre_hooks or _global_backward_hooks

1749 or _global_forward_hooks or _global_forward_pre_hooks):

-> 1750 return forward_call(*args, **kwargs)

1752 result = None

1753 called_always_called_hooks = set()

File ~/anaconda3/envs/llama-factory/lib/python3.10/site-packages/accelerate/hooks.py:176, in add_hook_to_module.<locals>.new_forward(module, *args, **kwargs)

174 output = module._old_forward(*args, **kwargs)

175 else :

--> 176     output = module._old_forward(*args, **kwargs)

177 return module._hf_hook.post_forward(module, output)

TypeError: Qwen2Attention.forward() missing 1 required positional argument: 'attention_mask'

How can I fix it? Thanks.

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions