Spaces:

optimum
/

llm-perf-leaderboard

Running

IlyasMoutawwakil HF staff commited on Jan 4

Commit

bb5689a

•

1 Parent(s): a1135a9

fix decode throughput

Files changed (2) hide show

src/latency_score_memory.py CHANGED Viewed

@@ -42,7 +42,7 @@ def get_lat_score_mem_fig(llm_perf_df):
             "xanchor": "center",
             "yanchor": "top",
         },
-        xaxis_title="Per 1000 Tokens Latency (s)",
         yaxis_title="Open LLM Score (%)",
         legend_title="LLM Architecture",
         width=1200,

             "xanchor": "center",
             "yanchor": "top",
         },
+        xaxis_title="Per 256 Tokens Latency (s)",
         yaxis_title="Open LLM Score (%)",
         legend_title="LLM Architecture",
         width=1200,

src/llm_perf.py CHANGED Viewed

@@ -101,10 +101,6 @@ def get_llm_perf_df(machine: str = "hf-dgx-01"):
     ].apply(lambda x: process_quantization_scheme(x), axis=1)
     # add arch
     llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
-    # add decode throughput
-    llm_perf_df["decode.throughput(tokens/s)"] = (
-        1000 / (llm_perf_df["generate.latency(s)"] - llm_perf_df["forward.latency(s)"])
-    ).round(2)
     # filter columns
     llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())]
     # rename columns

     ].apply(lambda x: process_quantization_scheme(x), axis=1)
     # add arch
     llm_perf_df["Arch"] = llm_perf_df["Arch"].apply(process_arch)
     # filter columns
     llm_perf_df = llm_perf_df[list(COLUMNS_MAPPING.keys())]
     # rename columns