Update README.md
Browse files
README.md
CHANGED
|
@@ -255,7 +255,7 @@ lm_eval --model hf --model_args pretrained=$MODEL --tasks mmlu --device cuda:0 -
|
|
| 255 |
| Benchmark | | | |
|
| 256 |
|----------------------------------|------------------------|--------------------------------|---------------------------------|
|
| 257 |
| | google/gemma-3-12b-it | jerryzh168/gemma-3-12b-it-INT4 | pytorch/gemma-3-12b-it-AWQ-INT4 |
|
| 258 |
-
| Peak Memory (GB) | 24.50 | 8.57 (65% reduction) | 12.
|
| 259 |
|
| 260 |
Note: jerryzh168/gemma-3-12b-it-INT4 is the H100 optimized checkpoint for INT4
|
| 261 |
|
|
|
|
| 255 |
| Benchmark | | | |
|
| 256 |
|----------------------------------|------------------------|--------------------------------|---------------------------------|
|
| 257 |
| | google/gemma-3-12b-it | jerryzh168/gemma-3-12b-it-INT4 | pytorch/gemma-3-12b-it-AWQ-INT4 |
|
| 258 |
+
| Peak Memory (GB) | 24.50 | 8.57 (65% reduction) | 12.60 (49% reduction) |
|
| 259 |
|
| 260 |
Note: jerryzh168/gemma-3-12b-it-INT4 is the H100 optimized checkpoint for INT4
|
| 261 |
|