Qwen/Qwen2-72B-Instruct-AWQ · Error AutoAWQ tensor 4 vllm

Jun 24

I have some error using 4 gpus after autoawq this model after finetuning, but this model works, how can I awq my model to work in 4 gpu I check that intermediate_size is different. Could you please give me code to reproduce this AWQ model.

Thanks so much.

prudant

Jul 6

same here, x2

fersebas

Jul 11

solved with this code executing and then change config 29696 intermediate size.
https://github.com/QwenLM/Qwen2/issues/578

import json
import os
from collections import OrderedDict
from typing import Dict

import torch
from safetensors import safe_open
from safetensors.torch import save_file
from tqdm import tqdm
from transformers.modeling_utils import (
SAFE_WEIGHTS_INDEX_NAME,
SAFE_WEIGHTS_NAME,
WEIGHTS_INDEX_NAME,
WEIGHTS_NAME,
shard_checkpoint,
)

def save_weight(input_dir: str, output_dir: str, shard_size: str, save_safetensors: bool) -> str:
qwen_state_dict: Dict[str, torch.Tensor] = OrderedDict()
for filepath in tqdm(os.listdir(input_dir), desc="Load weights"):
if os.path.isfile(os.path.join(input_dir, filepath)) and filepath.endswith(".safetensors"):
with safe_open(os.path.join(input_dir, filepath), framework="pt", device="cpu") as f:
for key in f.keys():
qwen_state_dict[key] = f.get_tensor(key)

qwen2_state_dict: Dict[str, torch.Tensor] = OrderedDict()
torch_dtype = None
for key, value in tqdm(qwen_state_dict.items(), desc="Convert format"):
    if torch_dtype is None:
        torch_dtype = value.dtype
    shape_list = [int(i) for i in value.shape]
    if len(shape_list) == 2:
        if shape_list[0] == 29568:
            value = torch.concat((value, torch.zeros([128, shape_list[1]], dtype=value.dtype)), dim=0)
        if shape_list[1] == 29568:
            value = torch.cat((value, torch.zeros([shape_list[0], 128], dtype=value.dtype)), dim=1)
    qwen2_state_dict[key] = value

weights_name = SAFE_WEIGHTS_NAME if save_safetensors else WEIGHTS_NAME
shards, index = shard_checkpoint(qwen2_state_dict, max_shard_size=shard_size, weights_name=weights_name)

for shard_file, shard in tqdm(shards.items(), desc="Save weights"):
    if save_safetensors:
        save_file(shard, os.path.join(output_dir, shard_file), metadata={"format": "pt"})
    else:
        torch.save(shard, os.path.join(output_dir, shard_file))

if index is None:
    print("Model weights saved in {}".format(os.path.join(output_dir, weights_name)))
else:
    index_name = SAFE_WEIGHTS_INDEX_NAME if save_safetensors else WEIGHTS_INDEX_NAME
    with open(os.path.join(output_dir, index_name), "w", encoding="utf-8") as f:
        json.dump(index, f, indent=2, sort_keys=True)
    print("Model weights saved in {}".format(output_dir))

return str(torch_dtype).replace("torch.", "")

fersebas

Jul 12

•

edited Jul 12

@prudant try it and tell me if u have any problem. We finally get works in 4 gpus. You have to pad the model with that code then change in config.json the configuration of intermediate size to 29696, then awq with 128 group size.

prudant

Jul 13

thanks! will try! regards