BenchmarkBot commited on
Commit
4cfc121
1 Parent(s): d3abea5

updated plot

Browse files
Files changed (2) hide show
  1. app.py +8 -8
  2. src/utils.py +3 -3
app.py CHANGED
@@ -1,7 +1,7 @@
1
- import plotly.express as px
2
  import os
3
  import gradio as gr
4
  import pandas as pd
 
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
 
7
  from src.assets.text_content import TITLE, INTRODUCTION_TEXT, SINGLE_A100_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
@@ -19,7 +19,7 @@ COLUMNS_MAPPING = {
19
  "backend.torch_dtype": "Datatype 📥",
20
  "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
21
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
22
- "h4_score": "H4 Score ⬆️",
23
  }
24
  COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number", "markdown"]
25
  SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
@@ -66,7 +66,7 @@ def get_benchmark_plot(benchmark):
66
  scores_df = pd.read_csv(
67
  f"./llm-perf-dataset/reports/additional_data.csv")
68
  bench_df = bench_df.merge(scores_df, on="model", how="left")
69
-
70
  bench_df = bench_df[bench_df["generate.latency(s)"] < 100]
71
 
72
  fig = px.scatter(
@@ -85,11 +85,11 @@ def get_benchmark_plot(benchmark):
85
  },
86
  xaxis_title="Average H4 Score",
87
  yaxis_title="Latency per 1000 Tokens (s)",
88
- legend_title="Model Type",
89
  width=1200,
90
  height=600,
91
  )
92
-
93
  fig.update_traces(
94
  hovertemplate="<br>".join([
95
  "Model: %{customdata[0]}",
@@ -174,7 +174,7 @@ with demo:
174
  max_rows=None,
175
  visible=False,
176
  )
177
-
178
  submit_button.click(
179
  submit_query,
180
  [
@@ -187,14 +187,14 @@ with demo:
187
  with gr.TabItem("🖥️ A100-80GB Plot 📊", id=1):
188
  # Original leaderboard plot
189
  gr.HTML(SINGLE_A100_TEXT)
190
-
191
  # Original leaderboard plot
192
  single_A100_plotly = gr.components.Plot(
193
  value=single_A100_plot,
194
  elem_id="1xA100-plot",
195
  show_label=False,
196
  )
197
-
198
  with gr.Row():
199
  with gr.Accordion("📙 Citation", open=False):
200
  citation_button = gr.Textbox(
 
 
1
  import os
2
  import gradio as gr
3
  import pandas as pd
4
+ import plotly.express as px
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
 
7
  from src.assets.text_content import TITLE, INTRODUCTION_TEXT, SINGLE_A100_TEXT, CITATION_BUTTON_LABEL, CITATION_BUTTON_TEXT
 
19
  "backend.torch_dtype": "Datatype 📥",
20
  "forward.peak_memory(MB)": "Peak Memory (MB) ⬇️",
21
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
22
+ "h4_score": "Average H4 Score ⬆️",
23
  }
24
  COLUMNS_DATATYPES = ["markdown", "str", "str", "number", "number", "markdown"]
25
  SORTING_COLUMN = ["Throughput (tokens/s) ⬆️"]
 
66
  scores_df = pd.read_csv(
67
  f"./llm-perf-dataset/reports/additional_data.csv")
68
  bench_df = bench_df.merge(scores_df, on="model", how="left")
69
+
70
  bench_df = bench_df[bench_df["generate.latency(s)"] < 100]
71
 
72
  fig = px.scatter(
 
85
  },
86
  xaxis_title="Average H4 Score",
87
  yaxis_title="Latency per 1000 Tokens (s)",
88
+ legend_title="Model Type, Backend",
89
  width=1200,
90
  height=600,
91
  )
92
+
93
  fig.update_traces(
94
  hovertemplate="<br>".join([
95
  "Model: %{customdata[0]}",
 
174
  max_rows=None,
175
  visible=False,
176
  )
177
+
178
  submit_button.click(
179
  submit_query,
180
  [
 
187
  with gr.TabItem("🖥️ A100-80GB Plot 📊", id=1):
188
  # Original leaderboard plot
189
  gr.HTML(SINGLE_A100_TEXT)
190
+
191
  # Original leaderboard plot
192
  single_A100_plotly = gr.components.Plot(
193
  value=single_A100_plot,
194
  elem_id="1xA100-plot",
195
  show_label=False,
196
  )
197
+
198
  with gr.Row():
199
  with gr.Accordion("📙 Citation", open=False):
200
  citation_button = gr.Textbox(
src/utils.py CHANGED
@@ -73,16 +73,16 @@ def extract_score_from_clickable(clickable_score) -> float:
73
 
74
 
75
  def submit_query(text, backends, datatypes, threshold, raw_df):
76
- raw_df["H4 Score ⬆️"] = raw_df["H4 Score ⬆️"].apply(
77
  extract_score_from_clickable)
78
 
79
  filtered_df = raw_df[
80
  raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
81
  raw_df["Backend 🏭"].isin(backends) &
82
  raw_df["Datatype 📥"].isin(datatypes) &
83
- (raw_df["H4 Score ⬆️"] >= threshold)
84
  ]
85
 
86
- filtered_df["H4 Score ⬆️"] = filtered_df["H4 Score ⬆️"].apply(
87
  make_clickable_score)
88
  return filtered_df
 
73
 
74
 
75
  def submit_query(text, backends, datatypes, threshold, raw_df):
76
+ raw_df["Average H4 Score ⬆️"] = raw_df["Average H4 Score ⬆️"].apply(
77
  extract_score_from_clickable)
78
 
79
  filtered_df = raw_df[
80
  raw_df["Model 🤗"].str.lower().str.contains(text.lower()) &
81
  raw_df["Backend 🏭"].isin(backends) &
82
  raw_df["Datatype 📥"].isin(datatypes) &
83
+ (raw_df["Average H4 Score ⬆️"] >= threshold)
84
  ]
85
 
86
+ filtered_df["Average H4 Score ⬆️"] = filtered_df["Average H4 Score ⬆️"].apply(
87
  make_clickable_score)
88
  return filtered_df