baptistecolle HF staff commited on
Commit
8e30a31
β€’
1 Parent(s): f45c3f0

add t4 to leaderboard (#30)

Browse files

- add t4 to leaderboard (1b7fb055871b54c99cf75616570506f50c7e9322)
- fix readme (38a9948acfac829033f4aa926a80abb5fab74cc8)

Files changed (4) hide show
  1. .gitignore +2 -1
  2. README.md +59 -1
  3. app.py +1 -0
  4. src/llm_perf.py +8 -3
.gitignore CHANGED
@@ -4,4 +4,5 @@ __pycache__/
4
  *ipynb
5
  .vscode/
6
 
7
- dataset/
 
 
4
  *ipynb
5
  .vscode/
6
 
7
+ dataset/
8
+ .venv
README.md CHANGED
@@ -11,4 +11,62 @@ license: apache-2.0
11
  tags: [llm perf leaderboard, llm performance leaderboard, llm, performance, leaderboard]
12
  ---
13
 
14
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  tags: [llm perf leaderboard, llm performance leaderboard, llm, performance, leaderboard]
12
  ---
13
 
14
+ # LLM-perf leaderboard
15
+
16
+ ## πŸ“ About
17
+ The πŸ€— LLM-Perf Leaderboard πŸ‹οΈ is a laderboard at the intersection of quality and performance.
18
+ Its aim is to benchmark the performance (latency, throughput, memory & energy)
19
+ of Large Language Models (LLMs) with different hardwares, backends and optimizations
20
+ using [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark).
21
+
22
+ Anyone from the community can request a new base model or hardware/backend/optimization
23
+ configuration for automated benchmarking:
24
+
25
+ - Model evaluation requests should be made in the
26
+ [πŸ€— Open LLM Leaderboard πŸ…](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) ;
27
+ we scrape the [list of canonical base models](https://github.com/huggingface/optimum-benchmark/blob/main/llm_perf/utils.py) from there.
28
+ - Hardware/Backend/Optimization configuration requests should be made in the
29
+ [πŸ€— LLM-Perf Leaderboard πŸ‹οΈ](https://huggingface.co/spaces/optimum/llm-perf-leaderboard) or
30
+ [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) repository (where the code is hosted).
31
+
32
+ ## ✍️ Details
33
+
34
+ - To avoid communication-dependent results, only one GPU is used.
35
+ - Score is the average evaluation score obtained from the [πŸ€— Open LLM Leaderboard](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard)
36
+ - LLMs are running on a singleton batch with a prompt size of 256 and generating a 64 tokens for at least 10 iterations and 10 seconds.
37
+ - Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.
38
+ - We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.
39
+
40
+ All of our benchmarks are ran by this single script
41
+ [benchmark_cuda_pytorch.py](https://github.com/huggingface/optimum-benchmark/blob/llm-perf/llm-perf/benchmark_cuda_pytorch.py)
42
+ using the power of [Optimum-Benhcmark](https://github.com/huggingface/optimum-benchmark) to garantee reproducibility and consistency.
43
+
44
+ ## πŸƒ How to run locally
45
+
46
+ To run the LLM-Perf Leaderboard locally on your machine, follow these steps:
47
+
48
+ ### 1. Clone the Repository
49
+
50
+ First, clone the repository to your local machine:
51
+
52
+ ```bash
53
+ git clone https://huggingface.co/spaces/optimum/llm-perf-leaderboard
54
+ cd llm-perf-leaderboard
55
+ ```
56
+
57
+ ### 2. Install the Required Dependencies
58
+
59
+ Install the necessary Python packages listed in the requirements.txt file:
60
+ `pip install -r requirements.txt`
61
+
62
+ ### 3. Run the Application
63
+
64
+ You can run the Gradio application in one of the following ways:
65
+ - Option 1: Using Python
66
+ `python app.py`
67
+ - Option 2: Using Gradio CLI (include hot-reload)
68
+ `gradio app.py`
69
+
70
+ ### 4. Access the Application
71
+
72
+ Once the application is running, you can access it locally in your web browser at http://127.0.0.1:7860/
app.py CHANGED
@@ -18,6 +18,7 @@ from src.panel import (
18
  MACHINE_TO_HARDWARE = {
19
  "1xA10": "A10-24GB-150W πŸ–₯️",
20
  "1xA100": "A100-80GB-275W πŸ–₯️",
 
21
  # "1xH100": "H100-80GB-700W πŸ–₯️",
22
  }
23
 
 
18
  MACHINE_TO_HARDWARE = {
19
  "1xA10": "A10-24GB-150W πŸ–₯️",
20
  "1xA100": "A100-80GB-275W πŸ–₯️",
21
+ "1xT4": "T4-16GB-70W πŸ–₯️",
22
  # "1xH100": "H100-80GB-700W πŸ–₯️",
23
  }
24
 
src/llm_perf.py CHANGED
@@ -4,6 +4,8 @@ import pandas as pd
4
 
5
  from .utils import process_kernels, process_quantizations
6
 
 
 
7
  COLUMNS_MAPPING = {
8
  "config.name": "Experiment πŸ§ͺ",
9
  "config.backend.model": "Model πŸ€—",
@@ -109,11 +111,14 @@ def processed_llm_perf_df(llm_perf_df):
109
 
110
 
111
  def get_llm_perf_df(machine: str = "1xA10"):
112
- if os.path.exists(f"llm-perf-leaderboard-{machine}.csv"):
113
- llm_perf_df = pd.read_csv(f"llm-perf-leaderboard-{machine}.csv")
 
 
 
114
  else:
115
  llm_perf_df = get_raw_llm_perf_df(machine)
116
  llm_perf_df = processed_llm_perf_df(llm_perf_df)
117
- llm_perf_df.to_csv(f"llm-perf-leaderboard-{machine}.csv", index=False)
118
 
119
  return llm_perf_df
 
4
 
5
  from .utils import process_kernels, process_quantizations
6
 
7
+ DATASET_DIRECTORY = "dataset"
8
+
9
  COLUMNS_MAPPING = {
10
  "config.name": "Experiment πŸ§ͺ",
11
  "config.backend.model": "Model πŸ€—",
 
111
 
112
 
113
  def get_llm_perf_df(machine: str = "1xA10"):
114
+ if not os.path.exists(DATASET_DIRECTORY):
115
+ os.makedirs(DATASET_DIRECTORY)
116
+
117
+ if os.path.exists(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv"):
118
+ llm_perf_df = pd.read_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv")
119
  else:
120
  llm_perf_df = get_raw_llm_perf_df(machine)
121
  llm_perf_df = processed_llm_perf_df(llm_perf_df)
122
+ llm_perf_df.to_csv(f"{DATASET_DIRECTORY}/llm-perf-leaderboard-{machine}.csv", index=False)
123
 
124
  return llm_perf_df