Spaces:

ludwigstumpp
/

llm-leaderboard

Running

App Files Files Community

Ludwig Stumpp commited on Aug 23

Commit

cfeff2f

•

2 Parent(s): 5a5e2af 1c71762

Merge branch 'main' into hf-launch

Browse files

Files changed (7) hide show

README.md +1 -0
poetry.lock +0 -0
pyproject.toml +14 -0
requirements-dev.txt +0 -4
requirements.txt +0 -2
setup.cfg +2 -0
streamlit_app.py +35 -13

README.md CHANGED Viewed

@@ -56,6 +56,7 @@ https://huggingface.co/spaces/ludwigstumpp/llm-leaderboard
 | [llama-13b](https://arxiv.org/abs/2302.13971)                                                               | Meta AI             | no    | [932](https://lmsys.org/blog/2023-05-03-arena/)  |                                                                           | [0.792](https://arxiv.org/abs/2302.13971)                          |                                                                 | [0.158](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) |                                               |                                                                 |                                                                                          |                                                                           |                                               |                                                                 | [0.730](https://arxiv.org/abs/2302.13971v1)                        |                                                                 |                                                                 |
 | [llama-33b](https://arxiv.org/abs/2302.13971)                                                               | Meta AI             | no    |                                                  |                                                                           | [0.828](https://arxiv.org/abs/2302.13971)                          |                                                                 | [0.217](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) |                                               |                                                                 |                                                                                          |                                                                           |                                               |                                                                 | [0.760](https://arxiv.org/abs/2302.13971v1)                        |                                                                 |                                                                 |
 | [llama-65b](https://arxiv.org/abs/2302.13971)                                                               | Meta AI             | no    |                                                  |                                                                           | [0.842](https://arxiv.org/abs/2302.13971)                          |                                                                 | [0.237](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) |                                               |                                                                 |                                                                                          | [0.634](https://arxiv.org/abs/2302.13971v1)                               |                                               |                                                                 | [0.770](https://arxiv.org/abs/2302.13971v1)                        |                                                                 |                                                                 |
 | [mpt-7b](https://huggingface.co/mosaicml/mpt-7b)                                                            | MosaicML            | yes   |                                                  |                                                                           | [0.761](https://www.mosaicml.com/blog/mpt-7b)                      |                                                                 |                                                                                 | [0.702](https://www.mosaicml.com/blog/mpt-7b) |                                                                 | [0.296](https://www.mosaicml.com/blog/mpt-7b)                                            |                                                                           | [0.343](https://www.mosaicml.com/blog/mpt-7b) |                                                                 |                                                                    |                                                                 |                                                                 |
 | [oasst-pythia-12b](https://huggingface.co/OpenAssistant/pythia-12b-pre-v8-12.5k-steps)                      | Open Assistant      | yes   | [1065](https://lmsys.org/blog/2023-05-03-arena/) |                                                                           | [0.681](https://gpt4all.io/reports/GPT4All_Technical_Report_3.pdf) |                                                                 |                                                                                 |                                               |                                                                 |                                                                                          |                                                                           |                                               |                                                                 | [0.650](https://gpt4all.io/reports/GPT4All_Technical_Report_3.pdf) |                                                                 |                                                                 |
 | [opt-7b](https://huggingface.co/facebook/opt-6.7b)                                                          | Meta AI             | no    |                                                  |                                                                           | [0.677](https://www.mosaicml.com/blog/mpt-7b)                      |                                                                 |                                                                                 | [0.677](https://www.mosaicml.com/blog/mpt-7b) |                                                                 | [0.251](https://www.mosaicml.com/blog/mpt-7b)                                            |                                                                           | [0.227](https://www.mosaicml.com/blog/mpt-7b) |                                                                 |                                                                    |                                                                 |                                                                 |

 | [llama-13b](https://arxiv.org/abs/2302.13971)                                                               | Meta AI             | no    | [932](https://lmsys.org/blog/2023-05-03-arena/)  |                                                                           | [0.792](https://arxiv.org/abs/2302.13971)                          |                                                                 | [0.158](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) |                                               |                                                                 |                                                                                          |                                                                           |                                               |                                                                 | [0.730](https://arxiv.org/abs/2302.13971v1)                        |                                                                 |                                                                 |
 | [llama-33b](https://arxiv.org/abs/2302.13971)                                                               | Meta AI             | no    |                                                  |                                                                           | [0.828](https://arxiv.org/abs/2302.13971)                          |                                                                 | [0.217](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) |                                               |                                                                 |                                                                                          |                                                                           |                                               |                                                                 | [0.760](https://arxiv.org/abs/2302.13971v1)                        |                                                                 |                                                                 |
 | [llama-65b](https://arxiv.org/abs/2302.13971)                                                               | Meta AI             | no    |                                                  |                                                                           | [0.842](https://arxiv.org/abs/2302.13971)                          |                                                                 | [0.237](https://drive.google.com/file/d/1cN-b9GnWtHzQRoE7M7gAEyivY0kl4BYs/view) |                                               |                                                                 |                                                                                          | [0.634](https://arxiv.org/abs/2302.13971v1)                               |                                               |                                                                 | [0.770](https://arxiv.org/abs/2302.13971v1)                        |                                                                 |                                                                 |
+| [llama-2-70b](https://arxiv.org/abs/2307.09288)                                                             | Meta AI             | yes   |                                                  | [0.873](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) |                                                                    |                                                                 |                                                                                 |                                               |                                                                 |                                                                                          | [0.698](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) |                                               |                                                                 |                                                                    |                                                                 |                                                                 |
 | [mpt-7b](https://huggingface.co/mosaicml/mpt-7b)                                                            | MosaicML            | yes   |                                                  |                                                                           | [0.761](https://www.mosaicml.com/blog/mpt-7b)                      |                                                                 |                                                                                 | [0.702](https://www.mosaicml.com/blog/mpt-7b) |                                                                 | [0.296](https://www.mosaicml.com/blog/mpt-7b)                                            |                                                                           | [0.343](https://www.mosaicml.com/blog/mpt-7b) |                                                                 |                                                                    |                                                                 |                                                                 |
 | [oasst-pythia-12b](https://huggingface.co/OpenAssistant/pythia-12b-pre-v8-12.5k-steps)                      | Open Assistant      | yes   | [1065](https://lmsys.org/blog/2023-05-03-arena/) |                                                                           | [0.681](https://gpt4all.io/reports/GPT4All_Technical_Report_3.pdf) |                                                                 |                                                                                 |                                               |                                                                 |                                                                                          |                                                                           |                                               |                                                                 | [0.650](https://gpt4all.io/reports/GPT4All_Technical_Report_3.pdf) |                                                                 |                                                                 |
 | [opt-7b](https://huggingface.co/facebook/opt-6.7b)                                                          | Meta AI             | no    |                                                  |                                                                           | [0.677](https://www.mosaicml.com/blog/mpt-7b)                      |                                                                 |                                                                                 | [0.677](https://www.mosaicml.com/blog/mpt-7b) |                                                                 | [0.251](https://www.mosaicml.com/blog/mpt-7b)                                            |                                                                           | [0.227](https://www.mosaicml.com/blog/mpt-7b) |                                                                 |                                                                    |                                                                 |                                                                 |

poetry.lock ADDED Viewed

The diff for this file is too large to render. See raw diff

pyproject.toml ADDED Viewed

	@@ -0,0 +1,14 @@

+[tool.poetry]
+package-mode = false
+description = ""
+authors = ["Ludwig Stumpp <42147848+LudwigStumpp@users.noreply.github.com>"]
+readme = "README.md"
+[tool.poetry.dependencies]
+python = "^3.10"
+pandas = "^2.2.2"
+streamlit = "^1.37.1"
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"

requirements-dev.txt DELETED Viewed

@@ -1,4 +0,0 @@
-black
-flake
-isort
-mypy

requirements.txt DELETED Viewed

	@@ -1,2 +0,0 @@
1	- pandas~=2.0.1
2	- streamlit~=1.22.0

setup.cfg ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ [flake8]
2	+ max-line-length = 88

streamlit_app.py CHANGED Viewed

@@ -4,7 +4,8 @@ from collections.abc import Iterable
 import pandas as pd
 import streamlit as st
-from pandas.api.types import is_bool_dtype, is_datetime64_any_dtype, is_numeric_dtype
 GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
 NON_BENCHMARK_COLS = ["Open?", "Publisher"]
@@ -22,11 +23,13 @@ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataF
     df = (
         pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
         .dropna(axis=1, how="all")  # drop empty columns
-        .iloc[1:]  # drop first row which is the "----" separator of the original markdown table
         .sort_index(ascending=True)
         .apply(lambda x: x.str.strip() if x.dtype == "object" else x)
         .replace("", float("NaN"))
-        .astype(float, errors="ignore")
     )
     # remove whitespace from column names and index
@@ -37,7 +40,9 @@ def extract_table_and_format_from_markdown_text(markdown_table: str) -> pd.DataF
     return df
-def extract_markdown_table_from_multiline(multiline: str, table_headline: str, next_headline_start: str = "#") -> str:
     """Extracts the markdown table from a multiline string.
     Args:
@@ -89,7 +94,9 @@ def remove_markdown_links(text: str) -> str:
     return text
-def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[str] | None = None) -> pd.DataFrame:
     """
     Filter dataframe by the rows and columns to display.
@@ -116,7 +123,8 @@ def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[s
             df = pd.DataFrame(df.loc[to_filter_index])
         to_filter_columns = st.multiselect(
-            "Filter by benchmark:", sorted([c for c in df.columns if c not in ignore_columns])
         )
         if to_filter_columns:
             df = pd.DataFrame(df[ignore_columns + to_filter_columns])
@@ -173,7 +181,9 @@ def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
                     ),
                 )
                 if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
-                    user_date_input_datetime = tuple(map(pd.to_datetime, user_date_input))
                     start_date, end_date = user_date_input_datetime
                     df = df.loc[df[column].between(start_date, end_date)]
@@ -207,22 +217,30 @@ def setup_basic():
 def setup_leaderboard(readme: str):
-    leaderboard_table = extract_markdown_table_from_multiline(readme, table_headline="## Leaderboard")
     leaderboard_table = remove_markdown_links(leaderboard_table)
     df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
-    df_leaderboard["Open?"] = df_leaderboard["Open?"].map({"yes": 1, "no": 0}).astype(bool)
     st.markdown("## Leaderboard")
     modify = st.checkbox("Add filters")
     clear_empty_entries = st.checkbox("Clear empty entries", value=True)
     if modify:
-        df_leaderboard = filter_dataframe_by_row_and_columns(df_leaderboard, ignore_columns=NON_BENCHMARK_COLS)
         df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
     if clear_empty_entries:
         df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
-        benchmark_columns = [c for c in df_leaderboard.columns if df_leaderboard[c].dtype == float]
         rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
         df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
@@ -246,12 +264,16 @@ def setup_leaderboard(readme: str):
 def setup_benchmarks(readme: str):
-    benchmarks_table = extract_markdown_table_from_multiline(readme, table_headline="## Benchmarks")
     df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
     st.markdown("## Covered Benchmarks")
-    selected_benchmark = st.selectbox("Select a benchmark to learn more:", df_benchmarks.index.unique())
     df_selected = df_benchmarks.loc[selected_benchmark]
     text = [
         f"Name: {selected_benchmark}",

 import pandas as pd
 import streamlit as st
+from pandas.api.types import (is_bool_dtype, is_datetime64_any_dtype,
+                              is_numeric_dtype)
 GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
 NON_BENCHMARK_COLS = ["Open?", "Publisher"]
     df = (
         pd.read_table(io.StringIO(markdown_table), sep="|", header=0, index_col=1)
         .dropna(axis=1, how="all")  # drop empty columns
+        .iloc[
+            1:
+        ]  # drop first row which is the "----" separator of the original markdown table
         .sort_index(ascending=True)
         .apply(lambda x: x.str.strip() if x.dtype == "object" else x)
         .replace("", float("NaN"))
+        .apply(pd.to_numeric, errors="ignore")
     )
     # remove whitespace from column names and index
     return df
+def extract_markdown_table_from_multiline(
+    multiline: str, table_headline: str, next_headline_start: str = "#"
+) -> str:
     """Extracts the markdown table from a multiline string.
     Args:
     return text
+def filter_dataframe_by_row_and_columns(
+    df: pd.DataFrame, ignore_columns: list[str] | None = None
+) -> pd.DataFrame:
     """
     Filter dataframe by the rows and columns to display.
             df = pd.DataFrame(df.loc[to_filter_index])
         to_filter_columns = st.multiselect(
+            "Filter by benchmark:",
+            sorted([c for c in df.columns if c not in ignore_columns]),
         )
         if to_filter_columns:
             df = pd.DataFrame(df[ignore_columns + to_filter_columns])
                     ),
                 )
                 if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
+                    user_date_input_datetime = tuple(
+                        map(pd.to_datetime, user_date_input)
+                    )
                     start_date, end_date = user_date_input_datetime
                     df = df.loc[df[column].between(start_date, end_date)]
 def setup_leaderboard(readme: str):
+    leaderboard_table = extract_markdown_table_from_multiline(
+        readme, table_headline="## Leaderboard"
+    )
     leaderboard_table = remove_markdown_links(leaderboard_table)
     df_leaderboard = extract_table_and_format_from_markdown_text(leaderboard_table)
+    df_leaderboard["Open?"] = (
+        df_leaderboard["Open?"].map({"yes": 1, "no": 0}).astype(bool)
+    )
     st.markdown("## Leaderboard")
     modify = st.checkbox("Add filters")
     clear_empty_entries = st.checkbox("Clear empty entries", value=True)
     if modify:
+        df_leaderboard = filter_dataframe_by_row_and_columns(
+            df_leaderboard, ignore_columns=NON_BENCHMARK_COLS
+        )
         df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
     if clear_empty_entries:
         df_leaderboard = df_leaderboard.dropna(axis=1, how="all")
+        benchmark_columns = [
+            c for c in df_leaderboard.columns if df_leaderboard[c].dtype == float
+        ]
         rows_wo_any_benchmark = df_leaderboard[benchmark_columns].isna().all(axis=1)
         df_leaderboard = df_leaderboard[~rows_wo_any_benchmark]
 def setup_benchmarks(readme: str):
+    benchmarks_table = extract_markdown_table_from_multiline(
+        readme, table_headline="## Benchmarks"
+    )
     df_benchmarks = extract_table_and_format_from_markdown_text(benchmarks_table)
     st.markdown("## Covered Benchmarks")
+    selected_benchmark = st.selectbox(
+        "Select a benchmark to learn more:", df_benchmarks.index.unique()
+    )
     df_selected = df_benchmarks.loc[selected_benchmark]
     text = [
         f"Name: {selected_benchmark}",