Spaces:
Running
Running
BenchmarkBot
commited on
Commit
β’
d574374
1
Parent(s):
3c37eb3
sort by score
Browse files- app.py +3 -2
- src/assets/text_content.py +4 -3
app.py
CHANGED
@@ -58,7 +58,8 @@ ALL_COLUMNS_DATATYPES = [
|
|
58 |
#
|
59 |
"markdown",
|
60 |
]
|
61 |
-
SORTING_COLUMN = ["
|
|
|
62 |
|
63 |
llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
|
64 |
|
@@ -110,7 +111,7 @@ def get_benchmark_table(bench_df):
|
|
110 |
axis=1,
|
111 |
)
|
112 |
# sort
|
113 |
-
copy_df.sort_values(by=SORTING_COLUMN, ascending=
|
114 |
# filter
|
115 |
copy_df = copy_df[list(ALL_COLUMNS_MAPPING.keys())]
|
116 |
# rename
|
|
|
58 |
#
|
59 |
"markdown",
|
60 |
]
|
61 |
+
SORTING_COLUMN = ["best_score"]
|
62 |
+
SORTING_ASCENDING = [False]
|
63 |
|
64 |
llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
|
65 |
|
|
|
111 |
axis=1,
|
112 |
)
|
113 |
# sort
|
114 |
+
copy_df.sort_values(by=SORTING_COLUMN, ascending=SORTING_ASCENDING, inplace=True)
|
115 |
# filter
|
116 |
copy_df = copy_df[list(ALL_COLUMNS_MAPPING.keys())]
|
117 |
# rename
|
src/assets/text_content.py
CHANGED
@@ -11,10 +11,11 @@ Anyone from the community can request a model or a hardware/backend/optimization
|
|
11 |
ABOUT_TEXT = """<h3>About the π€ LLM-Perf Leaderboard ποΈ</h3>
|
12 |
<ul>
|
13 |
<li>To avoid communication-dependent results, only one GPU is used.</li>
|
14 |
-
<li>LLMs are evaluated on a singleton batch with a prompt size of 512 and generating 1000 tokens.</li>
|
15 |
-
<li>Peak memory is measured in MB during the generate pass with py3nvml while assuring the GPU's isolation.</li>
|
16 |
-
<li>Each pair of (Model Type, Weight Class) is represented by the best scored model. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
|
17 |
<li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a>.</li>
|
|
|
|
|
|
|
|
|
18 |
</ul>
|
19 |
"""
|
20 |
|
|
|
11 |
ABOUT_TEXT = """<h3>About the π€ LLM-Perf Leaderboard ποΈ</h3>
|
12 |
<ul>
|
13 |
<li>To avoid communication-dependent results, only one GPU is used.</li>
|
|
|
|
|
|
|
14 |
<li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">π€ Open LLM Leaderboard</a>.</li>
|
15 |
+
<li>LLMs are running on a singleton batch with a prompt size of 512 and generating a 1000 tokens.</li>
|
16 |
+
<li>Peak memory is measured in MB during the generate pass using Py3NVML while assuring the GPU's isolation.</li>
|
17 |
+
<li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
|
18 |
+
<li>Each pair of (Model Type, Weight Class) is represented by the best scored model. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
|
19 |
</ul>
|
20 |
"""
|
21 |
|