Update README.md
Browse files
README.md
CHANGED
|
@@ -44,7 +44,7 @@ python3 quantize_quark.py --model_dir $MODEL_DIR \
|
|
| 44 |
--skip_evaluation \
|
| 45 |
--multi_device \
|
| 46 |
--model_export hf_format \
|
| 47 |
-
--output_dir amd/DeepSeek-R1-MXFP4
|
| 48 |
```
|
| 49 |
|
| 50 |
# Deployment
|
|
@@ -63,7 +63,7 @@ The model was evaluated using [SGLang](https://docs.sglang.ai/) and [lm-evaluati
|
|
| 63 |
</td>
|
| 64 |
<td><strong>DeepSeek-R1 </strong>
|
| 65 |
</td>
|
| 66 |
-
<td><strong>DeepSeek-R1-MXFP4
|
| 67 |
</td>
|
| 68 |
<td><strong>Recovery</strong>
|
| 69 |
</td>
|
|
@@ -99,7 +99,7 @@ The result of AIME24 was obtained using [SGLang](https://docs.sglang.ai/) while
|
|
| 99 |
```
|
| 100 |
# Launching server
|
| 101 |
python3 -m sglang.launch_server \
|
| 102 |
-
--model amd/DeepSeek-R1-MXFP4
|
| 103 |
--tp 8 \
|
| 104 |
--trust-remote-code \
|
| 105 |
--n-share-experts-fusion 8 \
|
|
@@ -107,7 +107,7 @@ python3 -m sglang.launch_server \
|
|
| 107 |
|
| 108 |
# Evaluating
|
| 109 |
lm_eval --model local-completions \
|
| 110 |
-
--model_args model=amd/DeepSeek-R1-MXFP4
|
| 111 |
--tasks aime24 \
|
| 112 |
--num_fewshot 0 \
|
| 113 |
--gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
|
|
@@ -119,7 +119,7 @@ lm_eval --model local-completions \
|
|
| 119 |
### GSM8K
|
| 120 |
```
|
| 121 |
lm_eval --model local-completions \
|
| 122 |
-
--model_args model=amd/DeepSeek-R1-MXFP4
|
| 123 |
--tasks gsm8k \
|
| 124 |
--num_fewshot 5 \
|
| 125 |
--batch_size auto \
|
|
|
|
| 44 |
--skip_evaluation \
|
| 45 |
--multi_device \
|
| 46 |
--model_export hf_format \
|
| 47 |
+
--output_dir amd/DeepSeek-R1-MXFP4
|
| 48 |
```
|
| 49 |
|
| 50 |
# Deployment
|
|
|
|
| 63 |
</td>
|
| 64 |
<td><strong>DeepSeek-R1 </strong>
|
| 65 |
</td>
|
| 66 |
+
<td><strong>DeepSeek-R1-MXFP4(this model)</strong>
|
| 67 |
</td>
|
| 68 |
<td><strong>Recovery</strong>
|
| 69 |
</td>
|
|
|
|
| 99 |
```
|
| 100 |
# Launching server
|
| 101 |
python3 -m sglang.launch_server \
|
| 102 |
+
--model amd/DeepSeek-R1-MXFP4 \
|
| 103 |
--tp 8 \
|
| 104 |
--trust-remote-code \
|
| 105 |
--n-share-experts-fusion 8 \
|
|
|
|
| 107 |
|
| 108 |
# Evaluating
|
| 109 |
lm_eval --model local-completions \
|
| 110 |
+
--model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=32000,temperature=0.6,top_p=0.95 \
|
| 111 |
--tasks aime24 \
|
| 112 |
--num_fewshot 0 \
|
| 113 |
--gen_kwargs "do_sample=True,temperature=0.6,top_p=0.95,max_tokens=32000" \
|
|
|
|
| 119 |
### GSM8K
|
| 120 |
```
|
| 121 |
lm_eval --model local-completions \
|
| 122 |
+
--model_args model=amd/DeepSeek-R1-MXFP4,base_url=http://localhost:30000/v1/completions,num_concurrent=999999,timeout=999999,tokenized_requests=False,max_length=8096 \
|
| 123 |
--tasks gsm8k \
|
| 124 |
--num_fewshot 5 \
|
| 125 |
--batch_size auto \
|