BenchmarkBot commited on
Commit
d262fb3
β€’
1 Parent(s): d8b9ce2

made models clickable

Browse files
Files changed (3) hide show
  1. app.py +28 -45
  2. src/assets/text_content.py +2 -0
  3. src/utils.py +62 -0
app.py CHANGED
@@ -1,69 +1,48 @@
1
  import os
2
  import gradio as gr
3
  import pandas as pd
4
- from huggingface_hub import HfApi, Repository
5
  from apscheduler.schedulers.background import BackgroundScheduler
6
 
7
  from src.assets.text_content import TITLE, INTRODUCTION_TEXT
8
  from src.assets.css_html_js import custom_css, get_window_url_params
 
9
 
10
- OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
11
 
12
  LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
13
- LLM_PERF_DATASET_REPO = "optimum/llm-perf"
 
14
 
 
15
 
16
- def restart_space():
17
- HfApi().restart_space(
18
- repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN
19
- )
20
 
 
 
 
21
 
22
- def load_dataset_repo():
23
- llm_perf_repo = None
24
- if OPTIMUM_TOKEN:
25
- print("Loading LLM-Perf-Dataset from Hub...")
26
- llm_perf_repo = Repository(
27
- local_dir="./llm-perf/",
28
- clone_from=LLM_PERF_DATASET_REPO,
29
- token=OPTIMUM_TOKEN,
30
- repo_type="dataset",
31
- )
32
- llm_perf_repo.git_pull()
33
 
34
- return llm_perf_repo
35
-
36
-
37
- def get_leaderboard_df():
38
- if llm_perf_repo:
39
- llm_perf_repo.git_pull()
40
-
41
- df = pd.read_csv("./llm-perf/reports/cuda_1_100/inference_report.csv")
42
  df = df[["model", "backend.name", "backend.torch_dtype", "backend.quantization",
43
  "generate.latency(s)", "generate.throughput(tokens/s)"]]
44
 
 
 
45
  df.rename(columns={
46
  "model": "Model",
47
- "backend.name": "Backend",
48
- "backend.torch_dtype": "Torch dtype",
49
- "backend.quantization": "Quantization",
50
- "generate.latency(s)": "Latency (s)",
51
- "generate.throughput(tokens/s)": "Throughput (tokens/s)"
52
  }, inplace=True)
53
 
54
- df.sort_values(by=["Throughput (tokens/s)"], ascending=False, inplace=True)
 
55
 
56
  return df
57
 
58
 
59
- def refresh():
60
- leaderboard_df = get_leaderboard_df()
61
-
62
- return leaderboard_df
63
-
64
-
65
- llm_perf_repo = load_dataset_repo()
66
-
67
  demo = gr.Blocks(css=custom_css)
68
  with demo:
69
  gr.HTML(TITLE)
@@ -71,15 +50,19 @@ with demo:
71
 
72
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
73
  with gr.TabItem("Vanilla Benchmark", elem_id="vanilla-benchmark", id=0):
74
- leaderboard_df = get_leaderboard_df()
 
75
  leaderboard_table_lite = gr.components.Dataframe(
76
- value=leaderboard_df,
77
- headers=leaderboard_df.columns.tolist(),
78
- max_rows=None,
79
- elem_id="leaderboard-table-lite",
80
  )
81
 
 
 
82
  scheduler = BackgroundScheduler()
83
  scheduler.add_job(restart_space, "interval", seconds=3600)
84
  scheduler.start()
 
 
85
  demo.queue(concurrency_count=40).launch()
 
1
  import os
2
  import gradio as gr
3
  import pandas as pd
 
4
  from apscheduler.schedulers.background import BackgroundScheduler
5
 
6
  from src.assets.text_content import TITLE, INTRODUCTION_TEXT
7
  from src.assets.css_html_js import custom_css, get_window_url_params
8
+ from src.utils import restart_space, load_dataset_repo, make_clickable_model
9
 
 
10
 
11
  LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
12
+ LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
13
+ OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN")
14
 
15
+ llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
16
 
 
 
 
 
17
 
18
+ def get_vanilla_benchmark_df():
19
+ if llm_perf_dataset_repo:
20
+ llm_perf_dataset_repo.git_pull()
21
 
22
+ df = pd.read_csv(
23
+ "./llm-perf-dataset/reports/cuda_1_100/inference_report.csv")
 
 
 
 
 
 
 
 
 
24
 
 
 
 
 
 
 
 
 
25
  df = df[["model", "backend.name", "backend.torch_dtype", "backend.quantization",
26
  "generate.latency(s)", "generate.throughput(tokens/s)"]]
27
 
28
+ df["model"] = df["model"].apply(make_clickable_model)
29
+
30
  df.rename(columns={
31
  "model": "Model",
32
+ "backend.name": "Backend 🏭",
33
+ "backend.torch_dtype": "Load dtype",
34
+ "backend.quantization": "Quantization πŸ—œοΈ",
35
+ "generate.latency(s)": "Latency (s) ⬇️",
36
+ "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
37
  }, inplace=True)
38
 
39
+ df.sort_values(by=["Throughput (tokens/s) ⬆️"],
40
+ ascending=False, inplace=True)
41
 
42
  return df
43
 
44
 
45
+ # Define demo interface
 
 
 
 
 
 
 
46
  demo = gr.Blocks(css=custom_css)
47
  with demo:
48
  gr.HTML(TITLE)
 
50
 
51
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
52
  with gr.TabItem("Vanilla Benchmark", elem_id="vanilla-benchmark", id=0):
53
+
54
+ vanilla_benchmark_df = get_vanilla_benchmark_df()
55
  leaderboard_table_lite = gr.components.Dataframe(
56
+ value=vanilla_benchmark_df,
57
+ headers=vanilla_benchmark_df.columns.tolist(),
58
+ elem_id="vanilla-benchmark",
 
59
  )
60
 
61
+
62
+ # Restart space every hour
63
  scheduler = BackgroundScheduler()
64
  scheduler.add_job(restart_space, "interval", seconds=3600)
65
  scheduler.start()
66
+
67
+ # Launch demo
68
  demo.queue(concurrency_count=40).launch()
src/assets/text_content.py CHANGED
@@ -2,4 +2,6 @@ TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM-Perf Leaderboard</h
2
 
3
  INTRODUCTION_TEXT = f"""
4
  The πŸ€— Open LLM-Perf Leaderboard aims to benchmark the performance (latency & throughput) of Large Language Models (LLMs) on different backends and hardwares using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark)
 
 
5
  """
 
2
 
3
  INTRODUCTION_TEXT = f"""
4
  The πŸ€— Open LLM-Perf Leaderboard aims to benchmark the performance (latency & throughput) of Large Language Models (LLMs) on different backends and hardwares using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark)
5
+
6
+ πŸ€— Anyone from the community can submit a model for automated benchmarking on the πŸ€— GPU cluster, as long as it is a πŸ€— Transformers model with weights on the Hub. We also support benchmarks of models with delta-weights for non-commercial licensed models, such as LLaMa.
7
  """
src/utils.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from huggingface_hub import HfApi, Repository
2
+
3
+
4
+ def restart_space(LLM_PERF_LEADERBOARD_REPO, OPTIMUM_TOKEN):
5
+ HfApi().restart_space(
6
+ repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN
7
+ )
8
+
9
+
10
+ def load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN):
11
+ llm_perf_repo = None
12
+ if OPTIMUM_TOKEN:
13
+ print("Loading LLM-Perf-Dataset from Hub...")
14
+ llm_perf_repo = Repository(
15
+ local_dir="./llm-perf/",
16
+ clone_from=LLM_PERF_DATASET_REPO,
17
+ token=OPTIMUM_TOKEN,
18
+ repo_type="dataset",
19
+ )
20
+ llm_perf_repo.git_pull()
21
+
22
+ return llm_perf_repo
23
+
24
+
25
+ LLAMAS = ["huggingface/llama-7b", "huggingface/llama-13b",
26
+ "huggingface/llama-30b", "huggingface/llama-65b"]
27
+ KOALA_LINK = "https://huggingface.co/TheBloke/koala-13B-HF"
28
+ VICUNA_LINK = "https://huggingface.co/lmsys/vicuna-13b-delta-v1.1"
29
+ OASST_LINK = "https://huggingface.co/OpenAssistant/oasst-sft-4-pythia-12b-epoch-3.5"
30
+ DOLLY_LINK = "https://huggingface.co/databricks/dolly-v2-12b"
31
+ MODEL_PAGE = "https://huggingface.co/models"
32
+ LLAMA_LINK = "https://ai.facebook.com/blog/large-language-model-llama-meta-ai/"
33
+ VICUNA_LINK = "https://huggingface.co/CarperAI/stable-vicuna-13b-delta"
34
+ ALPACA_LINK = "https://crfm.stanford.edu/2023/03/13/alpaca.html"
35
+
36
+
37
+ def model_hyperlink(link, model_name):
38
+ return f'<a target="_blank" href="{link}" style="color: var(--link-text-color); text-decoration: underline;text-decoration-style: dotted;">{model_name}</a>'
39
+
40
+
41
+ def make_clickable_model(model_name):
42
+ link = f"https://huggingface.co/{model_name}"
43
+
44
+ if model_name in LLAMAS:
45
+ link = LLAMA_LINK
46
+ model_name = model_name.split("/")[1]
47
+ elif model_name == "HuggingFaceH4/stable-vicuna-13b-2904":
48
+ link = VICUNA_LINK
49
+ model_name = "stable-vicuna-13b"
50
+ elif model_name == "HuggingFaceH4/llama-7b-ift-alpaca":
51
+ link = ALPACA_LINK
52
+ model_name = "alpaca-13b"
53
+ if model_name == "dolly-12b":
54
+ link = DOLLY_LINK
55
+ elif model_name == "vicuna-13b":
56
+ link = VICUNA_LINK
57
+ elif model_name == "koala-13b":
58
+ link = KOALA_LINK
59
+ elif model_name == "oasst-12b":
60
+ link = OASST_LINK
61
+
62
+ return model_hyperlink(link, model_name)