linzhao-amd commited on
Commit
f595047
·
verified ·
1 Parent(s): 985c484

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +2 -1
README.md CHANGED
@@ -33,7 +33,6 @@ You can either perform the dequantization manually using this [conversion script
33
  **Quantization scripts:**
34
  ```
35
  cd Quark/examples/torch/language_modeling/llm_ptq/
36
-
37
  python3 quantize_quark.py --model_dir $MODEL_DIR \
38
  --quant_scheme w_mxfp4_a_mxfp4 \
39
  --group_size 32 \
@@ -106,6 +105,7 @@ The results were obtained using the following commands:
106
 
107
  #### AIME2024
108
  ```
 
109
  python3 -m sglang.launch_server \
110
  --model amd/DeepSeek-R1-MXFP4 \
111
  --tp 8 \
@@ -113,6 +113,7 @@ python3 -m sglang.launch_server \
113
  --n-share-experts-fusion 8 \
114
  --disable-radix-cache
115
 
 
116
  lm_eval --model local-completions \
117
  --model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
118
  --tasks aime24 \
 
33
  **Quantization scripts:**
34
  ```
35
  cd Quark/examples/torch/language_modeling/llm_ptq/
 
36
  python3 quantize_quark.py --model_dir $MODEL_DIR \
37
  --quant_scheme w_mxfp4_a_mxfp4 \
38
  --group_size 32 \
 
105
 
106
  #### AIME2024
107
  ```
108
+ # starting server
109
  python3 -m sglang.launch_server \
110
  --model amd/DeepSeek-R1-MXFP4 \
111
  --tp 8 \
 
113
  --n-share-experts-fusion 8 \
114
  --disable-radix-cache
115
 
116
+ # evaluating
117
  lm_eval --model local-completions \
118
  --model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
119
  --tasks aime24 \