IlyasMoutawwakil HF staff commited on
Commit
bb5689a
β€’
1 Parent(s): a1135a9

fix decode throughput

Browse files
Files changed (2) hide show
  1. src/latency_score_memory.py +1 -1
  2. src/llm_perf.py +0 -4
src/latency_score_memory.py CHANGED
@@ -42,7 +42,7 @@ def get_lat_score_mem_fig(llm_perf_df):
42
  "xanchor": "center",
43
  "yanchor": "top",
44
  },
45
- xaxis_title="Per 1000 Tokens Latency (s)",
46
  yaxis_title="Open LLM Score (%)",
47
  legend_title="LLM Architecture",
48
  width=1200,
 
42
  "xanchor": "center",
43
  "yanchor": "top",
44
  },
45
+ xaxis_title="Per 256 Tokens Latency (s)",
46
  yaxis_title="Open LLM Score (%)",
47
  legend_title="LLM Architecture",
48
  width=1200,
src/llm_perf.py CHANGED
@@ -101,10 +101,6 @@ def get_llm_perf_df(machine: str = "hf-dgx-01"):
101
  ].apply(lambda x: process_quantization_scheme(x), axis=1)
102
  # add arch
103
  llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
104
- # add decode throughput
105
- llm_perf_df["decode.throughput(tokens/s)"] = (
106
- 1000 / (llm_perf_df["generate.latency(s)"] - llm_perf_df["forward.latency(s)"])
107
- ).round(2)
108
  # filter columns
109
  llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())]
110
  # rename columns
 
101
  ].apply(lambda x: process_quantization_scheme(x), axis=1)
102
  # add arch
103
  llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
 
 
 
 
104
  # filter columns
105
  llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())]
106
  # rename columns