Update README.md
Browse files
README.md
CHANGED
|
@@ -73,7 +73,7 @@ import torch
|
|
| 73 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
| 74 |
from auto_round import AutoRound
|
| 75 |
|
| 76 |
-
model_name = "Qwen/Qwen3-235B-A22B-
|
| 77 |
|
| 78 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
| 79 |
device_map="cpu", torch_dtype="auto")
|
|
@@ -88,7 +88,7 @@ for n, m in model.named_modules():
|
|
| 88 |
layer_config[n] = {"bits": 8, "group_size": 128}
|
| 89 |
|
| 90 |
autoround = AutoRound(model, tokenizer, iters=0, group_size=64, layer_config=layer_config)
|
| 91 |
-
output_dir = "/dataset/Qwen3-235B-A22B-
|
| 92 |
autoround.quantize_and_save(output_dir)
|
| 93 |
|
| 94 |
## tricky code to handle qkv fusing issue, we will fix it in vllm later
|
|
@@ -106,7 +106,6 @@ for i in range(num_hidden_layers):
|
|
| 106 |
extra_config[qkv_name] = {"bits": 8, "group_size": 128}
|
| 107 |
with open(config_path, "w") as file:
|
| 108 |
json.dump(config, file, indent=2)
|
| 109 |
-
exit()
|
| 110 |
|
| 111 |
```
|
| 112 |
|
|
|
|
| 73 |
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
|
| 74 |
from auto_round import AutoRound
|
| 75 |
|
| 76 |
+
model_name = "Qwen/Qwen3-235B-A22B-Instruct-2507"
|
| 77 |
|
| 78 |
model = AutoModelForCausalLM.from_pretrained(model_name,
|
| 79 |
device_map="cpu", torch_dtype="auto")
|
|
|
|
| 88 |
layer_config[n] = {"bits": 8, "group_size": 128}
|
| 89 |
|
| 90 |
autoround = AutoRound(model, tokenizer, iters=0, group_size=64, layer_config=layer_config)
|
| 91 |
+
output_dir = "/dataset/Qwen3-235B-A22B-Instruct-2507-int4-mixed"
|
| 92 |
autoround.quantize_and_save(output_dir)
|
| 93 |
|
| 94 |
## tricky code to handle qkv fusing issue, we will fix it in vllm later
|
|
|
|
| 106 |
extra_config[qkv_name] = {"bits": 8, "group_size": 128}
|
| 107 |
with open(config_path, "w") as file:
|
| 108 |
json.dump(config, file, indent=2)
|
|
|
|
| 109 |
|
| 110 |
```
|
| 111 |
|