BenchmarkBot commited on
Commit
3c37eb3
β€’
1 Parent(s): e2e1ee9

move things around

Browse files
Files changed (2) hide show
  1. app.py +16 -21
  2. src/assets/text_content.py +5 -5
app.py CHANGED
@@ -27,13 +27,13 @@ LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
27
  OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
28
 
29
  ALL_COLUMNS_MAPPING = {
30
- "weight_class": "Class πŸ‹οΈ",
31
- "model_type": "Type πŸ€—",
32
- #
33
  "backend.name": "Backend 🏭",
34
  "backend.torch_dtype": "Dtype πŸ“₯",
35
- "quantization": "Quantization πŸ—œοΈ",
36
  "optimizations": "Optimizations πŸ› οΈ",
 
 
 
 
37
  #
38
  "generate.peak_memory(MB)": "Memory (MB) ⬇️",
39
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
@@ -45,9 +45,9 @@ ALL_COLUMNS_MAPPING = {
45
  ALL_COLUMNS_DATATYPES = [
46
  "str",
47
  "str",
48
- #
49
  "str",
50
  "str",
 
51
  "str",
52
  "str",
53
  #
@@ -85,21 +85,16 @@ def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
85
  merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
86
  lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
87
  )
88
- # distance to 100% score, normalized to 0, 1
89
- score_distance = (100 - merged_df["best_score"]) / 100
90
- # distance to 0s latency, normalized to 0, 1
91
- latency_distance = merged_df["generate.latency(s)"] / (
92
- merged_df["generate.latency(s)"].max() - merged_df["generate.latency(s)"].min()
93
- )
94
- # distance to 0MB memory
95
- memory_distance = merged_df["forward.peak_memory(MB)"] / (
96
- merged_df["forward.peak_memory(MB)"].max()
97
- - merged_df["forward.peak_memory(MB)"].min()
98
- )
99
- # add perf distance
100
- merged_df["perf_distance"] = (
101
- score_distance**2 + latency_distance**2 + memory_distance**2
102
- ) ** 0.5
103
 
104
  return merged_df
105
 
@@ -121,7 +116,7 @@ def get_benchmark_table(bench_df):
121
  # rename
122
  copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
123
  # transform
124
- copy_df["LLM Type πŸ€—"] = copy_df["LLM Type πŸ€—"].apply(process_model_type)
125
  copy_df["Best Scored LLM πŸ†"] = copy_df["Best Scored LLM πŸ†"].apply(
126
  process_model_name
127
  )
 
27
  OPTIMUM_TOKEN = os.environ.get("OPTIMUM_TOKEN", None)
28
 
29
  ALL_COLUMNS_MAPPING = {
 
 
 
30
  "backend.name": "Backend 🏭",
31
  "backend.torch_dtype": "Dtype πŸ“₯",
 
32
  "optimizations": "Optimizations πŸ› οΈ",
33
+ "quantization": "Quantization πŸ—œοΈ",
34
+ #
35
+ "weight_class": "Class πŸ‹οΈ",
36
+ "model_type": "Type πŸ€—",
37
  #
38
  "generate.peak_memory(MB)": "Memory (MB) ⬇️",
39
  "generate.throughput(tokens/s)": "Throughput (tokens/s) ⬆️",
 
45
  ALL_COLUMNS_DATATYPES = [
46
  "str",
47
  "str",
 
48
  "str",
49
  "str",
50
+ #
51
  "str",
52
  "str",
53
  #
 
85
  merged_df["quantization"] = merged_df["backend.quantization_strategy"].apply(
86
  lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
87
  )
88
+ # # distance to 100% score
89
+ # score_distance = 100 - merged_df["best_score"]
90
+ # # distance to 0s latency
91
+ # latency_distance = merged_df["generate.latency(s)"]
92
+ # # distance to 0MB memory
93
+ # memory_distance = merged_df["forward.peak_memory(MB)"]
94
+ # # add perf distance
95
+ # merged_df["perf_distance"] = (
96
+ # score_distance**2 + latency_distance**2 + memory_distance**2
97
+ # ) ** 0.5
 
 
 
 
 
98
 
99
  return merged_df
100
 
 
116
  # rename
117
  copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
118
  # transform
119
+ copy_df["Type πŸ€—"] = copy_df["Type πŸ€—"].apply(process_model_type)
120
  copy_df["Best Scored LLM πŸ†"] = copy_df["Best Scored LLM πŸ†"].apply(
121
  process_model_name
122
  )
src/assets/text_content.py CHANGED
@@ -1,14 +1,14 @@
1
- TITLE = """<h1 align="center" id="space-title">πŸ€— Open LLM-Perf Leaderboard πŸ‹οΈ</h1>"""
2
 
3
  INTRODUCTION_TEXT = f"""
4
- The πŸ€— Open LLM-Perf Leaderboard πŸ‹οΈ aims to benchmark the performance (latency, throughput & memory) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
5
 
6
  Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
7
- - Model evaluation requests should be made in the [πŸ€— Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the πŸ€— Open LLM-Perf Leaderboard πŸ‹οΈ automatically.
8
  - Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
9
  """
10
 
11
- ABOUT_TEXT = """<h3>About the πŸ€— Open LLM-Perf Leaderboard πŸ‹οΈ</h3>
12
  <ul>
13
  <li>To avoid communication-dependent results, only one GPU is used.</li>
14
  <li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
@@ -63,7 +63,7 @@ benchmark:
63
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
64
  CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
65
  author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
66
- title = {Open LLM-Perf Leaderboard},
67
  year = {2023},
68
  publisher = {Hugging Face},
69
  howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",
 
1
+ TITLE = """<h1 align="center" id="space-title">πŸ€— LLM-Perf Leaderboard πŸ‹οΈ</h1>"""
2
 
3
  INTRODUCTION_TEXT = f"""
4
+ The πŸ€— LLM-Perf Leaderboard πŸ‹οΈ aims to benchmark the performance (latency, throughput & memory) of Large Language Models (LLMs) with different hardwares, backends and optimizations using [Optimum-Benchmark](https://github.com/huggingface/optimum-benchmark) and [Optimum](https://github.com/huggingface/optimum) flavors.
5
 
6
  Anyone from the community can request a model or a hardware/backend/optimization configuration for automated benchmarking:
7
+ - Model evaluation requests should be made in the [πŸ€— Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) and will be added to the πŸ€— LLM-Perf Leaderboard πŸ‹οΈ automatically.
8
  - Hardware/Backend/Optimization performance requests should be made in the [community discussions](https://huggingface.co/spaces/optimum/llm-perf-leaderboard/discussions) to assess their relevance and feasibility.
9
  """
10
 
11
+ ABOUT_TEXT = """<h3>About the πŸ€— LLM-Perf Leaderboard πŸ‹οΈ</h3>
12
  <ul>
13
  <li>To avoid communication-dependent results, only one GPU is used.</li>
14
  <li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
 
63
  CITATION_BUTTON_LABEL = "Copy the following snippet to cite these results."
64
  CITATION_BUTTON_TEXT = r"""@misc{open-llm-perf-leaderboard,
65
  author = {Ilyas Moutawwakil, RΓ©gis Pierrard},
66
+ title = {LLM-Perf Leaderboard},
67
  year = {2023},
68
  publisher = {Hugging Face},
69
  howpublished = "\url{https://huggingface.co/spaces/optimum/llm-perf-leaderboard}",