Spaces:

optimum
/

llm-perf-leaderboard

Running

IlyasMoutawwakil HF Staff commited on Nov 17, 2023

Commit

a830adb

1 Parent(s): f8badc6

add logo

Files changed (3) hide show

app.py CHANGED Viewed

@@ -18,6 +18,7 @@ from src.assets.text_content import (
 )
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
 LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB 🖥️"}
 ALL_COLUMNS_MAPPING = {
@@ -208,6 +209,8 @@ def filter_query(
 # Demo interface
 demo = gr.Blocks(css=custom_css)
 with demo:
     # leaderboard title
     gr.HTML(TITLE)
     # introduction text

 )
 HF_TOKEN = os.environ.get("HF_TOKEN", None)
+LOGO_URL = "https://huggingface.co/spaces/optimum/llm-perf-leaderboard/resolve/main/huggy_bench.png"
 LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
 MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB 🖥️"}
 ALL_COLUMNS_MAPPING = {
 # Demo interface
 demo = gr.Blocks(css=custom_css)
 with demo:
+    # logo
+    gr.HTML(f'<img src="{LOGO_URL}">', elem_classes="logo")
     # leaderboard title
     gr.HTML(TITLE)
     # introduction text

src/assets/css_html_js.py CHANGED Viewed

@@ -1,4 +1,11 @@
 custom_css = """
 .descriptive-text {
     font-size: 16px !important;
 }

 custom_css = """
+.logo {
+    width: 300px;
+    height: auto;
+    margin: 0 auto;
+    max-width: 100%
+    object-fit: contain;
+}
 .descriptive-text {
     font-size: 16px !important;
 }

src/assets/text_content.py CHANGED Viewed

@@ -13,9 +13,8 @@ ABOUT_TEXT = """<h3>About the 🤗 LLM-Perf Leaderboard 🏋️</h3>
     <li>To avoid communication-dependent results, only one GPU is used.</li>
     <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">🤗 Open LLM Leaderboard</a>.</li>
     <li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 1000 tokens.</li>
-    <li>Peak memory is measured in MB during the generate pass using Py3NVML while assuring the GPU's isolation.</li>
     <li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
-    <li>Each pair of (Model Type, Weight Class) is represented by the best scored model. This LLM is the one used for all the hardware/backend/optimization experiments.</li>
 </ul>
 """

     <li>To avoid communication-dependent results, only one GPU is used.</li>
     <li>Score is the average evaluation score obtained from the <a href="https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard">🤗 Open LLM Leaderboard</a>.</li>
     <li>LLMs are running on a singleton batch with a prompt size of 256 and generating a 1000 tokens.</li>
     <li>Energy consumption is measured in kWh using CodeCarbon and taking into consideration the GPU, CPU, RAM and location of the machine.</li>
+    <li>We measure three types of memory: Max Allocated Memory, Max Reserved Memory and Max Used Memory. The first two being reported by PyTorch and the last one being observed using PyNVML.</li>
 </ul>
 """