IlyasMoutawwakil HF staff commited on
Commit
a1f6c2e
β€’
1 Parent(s): bd9edb7

update viz

Browse files
Files changed (2) hide show
  1. src/latency_score_memory.py +1 -1
  2. src/llm_perf.py +9 -8
src/latency_score_memory.py CHANGED
@@ -15,7 +15,7 @@ SCORE_MEMORY_LATENCY_DATA = [
15
  "Decode Throughput (tokens/s)",
16
  "Allocated Memory (MB)",
17
  "E2E Latency (s)",
18
- "E2E Throughput (tokens/s)",
19
  ]
20
 
21
 
 
15
  "Decode Throughput (tokens/s)",
16
  "Allocated Memory (MB)",
17
  "E2E Latency (s)",
18
+ # "E2E Throughput (tokens/s)",
19
  ]
20
 
21
 
src/llm_perf.py CHANGED
@@ -12,22 +12,23 @@ COLUMNS_MAPPING = {
12
  "Model": "Model πŸ€—",
13
  "Arch": "Arch πŸ›οΈ",
14
  "Size": "Params (B)",
15
- "Score": "Open LLM Score (%)",
16
- # deployment settings
17
- "backend.name": "Backend 🏭",
18
- "backend.torch_dtype": "DType πŸ“₯",
19
- "optimization": "Optimization πŸ› οΈ",
20
- "quantization": "Quantization πŸ—œοΈ",
21
  # primary measurements
22
  "forward.latency(s)": "Prefill Latency (s)",
23
  "decode.throughput(tokens/s)": "Decode Throughput (tokens/s)",
24
  "generate.max_memory_allocated(MB)": "Allocated Memory (MB)",
25
  "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh)",
 
 
 
 
 
26
  # additional measurements
 
27
  "generate.latency(s)": "E2E Latency (s)",
28
  "generate.throughput(tokens/s)": "E2E Throughput (tokens/s)",
29
- "generate.max_memory_reserved(MB)": "Reserved Memory (MB)",
30
- "generate.max_memory_used(MB)": "Used Memory (MB)",
31
  }
32
  SORTING_COLUMNS = [
33
  "Open LLM Score (%)",
 
12
  "Model": "Model πŸ€—",
13
  "Arch": "Arch πŸ›οΈ",
14
  "Size": "Params (B)",
15
+
 
 
 
 
 
16
  # primary measurements
17
  "forward.latency(s)": "Prefill Latency (s)",
18
  "decode.throughput(tokens/s)": "Decode Throughput (tokens/s)",
19
  "generate.max_memory_allocated(MB)": "Allocated Memory (MB)",
20
  "generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh)",
21
+ # deployment settings
22
+ "backend.name": "Backend 🏭",
23
+ "backend.torch_dtype": "DType πŸ“₯",
24
+ "optimization": "Optimization πŸ› οΈ",
25
+ "quantization": "Quantization πŸ—œοΈ",
26
  # additional measurements
27
+ "Score": "Open LLM Score (%)",
28
  "generate.latency(s)": "E2E Latency (s)",
29
  "generate.throughput(tokens/s)": "E2E Throughput (tokens/s)",
30
+ # "generate.max_memory_reserved(MB)": "Reserved Memory (MB)",
31
+ # "generate.max_memory_used(MB)": "Used Memory (MB)",
32
  }
33
  SORTING_COLUMNS = [
34
  "Open LLM Score (%)",