amd
/

DeepSeek-R1-MXFP4

8-bit precision

Model card Files Files and versions

linzhao-amd commited on Aug 4, 2025

Commit

f595047

·

verified ·

1 Parent(s): 985c484

Update README.md

Files changed (1) hide show

README.md +2 -1

README.md CHANGED Viewed

@@ -33,7 +33,6 @@ You can either perform the dequantization manually using this [conversion script
 **Quantization scripts:**
 ```
 cd Quark/examples/torch/language_modeling/llm_ptq/
 python3 quantize_quark.py --model_dir $MODEL_DIR \
                           --quant_scheme w_mxfp4_a_mxfp4 \
                           --group_size 32 \
@@ -106,6 +105,7 @@ The results were obtained using the following commands:
 #### AIME2024
 ```
 python3 -m sglang.launch_server \
     --model amd/DeepSeek-R1-MXFP4 \
     --tp 8  \
@@ -113,6 +113,7 @@ python3 -m sglang.launch_server \
     --n-share-experts-fusion 8 \
     --disable-radix-cache
 lm_eval --model local-completions \
     --model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
     --tasks aime24 \

 **Quantization scripts:**
 ```
 cd Quark/examples/torch/language_modeling/llm_ptq/
 python3 quantize_quark.py --model_dir $MODEL_DIR \
                           --quant_scheme w_mxfp4_a_mxfp4 \
                           --group_size 32 \
 #### AIME2024
 ```
+# starting server
 python3 -m sglang.launch_server \
     --model amd/DeepSeek-R1-MXFP4 \
     --tp 8  \
     --n-share-experts-fusion 8 \
     --disable-radix-cache
+# evaluating
 lm_eval --model local-completions \
     --model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
     --tasks aime24 \