amd
/

DeepSeek-R1-MXFP4

8-bit precision

Model card Files Files and versions

yuzhouuu commited on 29 days ago

Commit

126d8f7

·

verified ·

1 Parent(s): 7f771af

Update README.md

Files changed (1) hide show

README.md +5 -5

README.md CHANGED Viewed

@@ -44,7 +44,7 @@ python3 quantize_quark.py --model_dir $MODEL_DIR \
                           --skip_evaluation \
                           --multi_device  \
                           --model_export hf_format \
-                          --output_dir amd/DeepSeek-R1-MXFP4-Preview
 ```
 # Deployment
@@ -63,7 +63,7 @@ The model was evaluated using [SGLang](https://docs.sglang.ai/) and [lm-evaluati
    </td>
    <td><strong>DeepSeek-R1 </strong>
    </td>
-   <td><strong>DeepSeek-R1-MXFP4-Preview(this model)</strong>
    </td>
    <td><strong>Recovery</strong>
    </td>
@@ -99,7 +99,7 @@ The result of AIME24 was obtained using [SGLang](https://docs.sglang.ai/) while
 ```
 # Launching server
 python3 -m sglang.launch_server \
-    --model amd/DeepSeek-R1-MXFP4-Preview \
     --tp 8  \
     --trust-remote-code  \
     --n-share-experts-fusion 8 \
@@ -107,7 +107,7 @@ python3 -m sglang.launch_server \
 # Evaluating
 lm_eval --model local-completions \
-    --model_args model=amd/DeepSeek-R1-MXFP4-Preview,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
     --tasks aime24 \
     --num_fewshot 0 \
     --gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
@@ -119,7 +119,7 @@ lm_eval --model local-completions \
 ### GSM8K
 ```
 lm_eval --model local-completions \
-    --model_args model=amd/DeepSeek-R1-MXFP4-Preview,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=8096 \
     --tasks gsm8k \
     --num_fewshot 5 \
     --batch_size auto \

                           --skip_evaluation \
                           --multi_device  \
                           --model_export hf_format \
+                          --output_dir amd/DeepSeek-R1-MXFP4
 ```
 # Deployment
    </td>
    <td><strong>DeepSeek-R1 </strong>
    </td>
+   <td><strong>DeepSeek-R1-MXFP4(this model)</strong>
    </td>
    <td><strong>Recovery</strong>
    </td>
 ```
 # Launching server
 python3 -m sglang.launch_server \
+    --model amd/DeepSeek-R1-MXFP4 \
     --tp 8  \
     --trust-remote-code  \
     --n-share-experts-fusion 8 \
 # Evaluating
 lm_eval --model local-completions \
+    --model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
     --tasks aime24 \
     --num_fewshot 0 \
     --gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
 ### GSM8K
 ```
 lm_eval --model local-completions \
+    --model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=8096 \
     --tasks gsm8k \
     --num_fewshot 5 \
     --batch_size auto \