wenhuach commited on
Commit
76db5e3
·
verified ·
1 Parent(s): 75bd1a7

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -3
README.md CHANGED
@@ -73,7 +73,7 @@ import torch
73
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
74
  from auto_round import AutoRound
75
 
76
- model_name = "Qwen/Qwen3-235B-A22B-Thinking-2507"
77
 
78
  model = AutoModelForCausalLM.from_pretrained(model_name,
79
  device_map="cpu", torch_dtype="auto")
@@ -88,7 +88,7 @@ for n, m in model.named_modules():
88
  layer_config[n] = {"bits": 8, "group_size": 128}
89
 
90
  autoround = AutoRound(model, tokenizer, iters=0, group_size=64, layer_config=layer_config)
91
- output_dir = "/dataset/Qwen3-235B-A22B-Thinking-2507-int4-mixed"
92
  autoround.quantize_and_save(output_dir)
93
 
94
  ## tricky code to handle qkv fusing issue, we will fix it in vllm later
@@ -106,7 +106,6 @@ for i in range(num_hidden_layers):
106
  extra_config[qkv_name] = {"bits": 8, "group_size": 128}
107
  with open(config_path, "w") as file:
108
  json.dump(config, file, indent=2)
109
- exit()
110
 
111
  ```
112
 
 
73
  from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig
74
  from auto_round import AutoRound
75
 
76
+ model_name = "Qwen/Qwen3-235B-A22B-Instruct-2507"
77
 
78
  model = AutoModelForCausalLM.from_pretrained(model_name,
79
  device_map="cpu", torch_dtype="auto")
 
88
  layer_config[n] = {"bits": 8, "group_size": 128}
89
 
90
  autoround = AutoRound(model, tokenizer, iters=0, group_size=64, layer_config=layer_config)
91
+ output_dir = "/dataset/Qwen3-235B-A22B-Instruct-2507-int4-mixed"
92
  autoround.quantize_and_save(output_dir)
93
 
94
  ## tricky code to handle qkv fusing issue, we will fix it in vllm later
 
106
  extra_config[qkv_name] = {"bits": 8, "group_size": 128}
107
  with open(config_path, "w") as file:
108
  json.dump(config, file, indent=2)
 
109
 
110
  ```
111