jerryzh168 commited on
Commit
638f535
·
verified ·
1 Parent(s): 6be1d49

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +3 -3
README.md CHANGED
@@ -255,7 +255,7 @@ lm_eval --model hf --model_args pretrained=$MODEL --tasks mmlu --device cuda:0 -
255
  | Benchmark | | | |
256
  |----------------------------------|------------------------|--------------------------------|---------------------------------|
257
  | | google/gemma-3-12b-it | jerryzh168/gemma-3-12b-it-INT4 | pytorch/gemma-3-12b-it-AWQ-INT4 |
258
- | Peak Memory (GB) | 24.50 | 8.57 (65% reduction) | 12.60 (49% reduction) |
259
 
260
  Note: jerryzh168/gemma-3-12b-it-INT4 is the H100 optimized checkpoint for INT4
261
 
@@ -317,8 +317,8 @@ print(f"Peak Memory Usage: {mem:.02f} GB")
317
  | Benchmark (Latency) | | | |
318
  |----------------------------------|------------------------|--------------------------------|---------------------------------|
319
  | | google/gemma-3-12b-it | jerryzh168/gemma-3-12b-it-INT4 | pytorch/gemma-3-12b-it-AWQ-INT4 |
320
- | latency (batch_size=1) | 3.73s | 2.76 (1.35x speedup) | 2.76s (1.35x speedup) |
321
- | latency (batch_size=256) | 13.63s | 14.32 (0.95x speedup) | 14.30s (0.95x speedup) |
322
 
323
 
324
  Note: jerryzh168/gemma-3-12b-it-INT4 is the H100 optimized checkpoint for INT4
 
255
  | Benchmark | | | |
256
  |----------------------------------|------------------------|--------------------------------|---------------------------------|
257
  | | google/gemma-3-12b-it | jerryzh168/gemma-3-12b-it-INT4 | pytorch/gemma-3-12b-it-AWQ-INT4 |
258
+ | Peak Memory (GB) | 24.51 | 10.37 (58% reduction) | 12.60 (49% reduction) |
259
 
260
  Note: jerryzh168/gemma-3-12b-it-INT4 is the H100 optimized checkpoint for INT4
261
 
 
317
  | Benchmark (Latency) | | | |
318
  |----------------------------------|------------------------|--------------------------------|---------------------------------|
319
  | | google/gemma-3-12b-it | jerryzh168/gemma-3-12b-it-INT4 | pytorch/gemma-3-12b-it-AWQ-INT4 |
320
+ | latency (batch_size=1) | 3.73s | 2.73 (1.37x speedup) | 2.76s (1.35x speedup) |
321
+ | latency (batch_size=256) | 14.07s | 13.81 (1.02x speedup) | 13.93s (1.01x speedup) |
322
 
323
 
324
  Note: jerryzh168/gemma-3-12b-it-INT4 is the H100 optimized checkpoint for INT4