Spaces:
Running
Running
Ludwig Stumpp
commited on
Commit
Β·
8658420
1
Parent(s):
9d7638e
Add filtering by values
Browse files- streamlit_app.py +74 -8
streamlit_app.py
CHANGED
|
@@ -1,8 +1,10 @@
|
|
| 1 |
import io
|
| 2 |
import re
|
|
|
|
| 3 |
|
| 4 |
import pandas as pd
|
| 5 |
import streamlit as st
|
|
|
|
| 6 |
|
| 7 |
GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
| 8 |
|
|
@@ -86,10 +88,11 @@ def remove_markdown_links(text: str) -> str:
|
|
| 86 |
return text
|
| 87 |
|
| 88 |
|
| 89 |
-
def
|
| 90 |
"""
|
| 91 |
-
|
| 92 |
|
|
|
|
| 93 |
Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
|
| 94 |
|
| 95 |
Args:
|
|
@@ -99,11 +102,6 @@ def filter_dataframe(df: pd.DataFrame, ignore_columns: list[str] | None = None)
|
|
| 99 |
Returns:
|
| 100 |
pd.DataFrame: Filtered dataframe
|
| 101 |
"""
|
| 102 |
-
modify = st.checkbox("Add filters")
|
| 103 |
-
|
| 104 |
-
if not modify:
|
| 105 |
-
return df
|
| 106 |
-
|
| 107 |
df = df.copy()
|
| 108 |
|
| 109 |
if ignore_columns is None:
|
|
@@ -123,6 +121,67 @@ def filter_dataframe(df: pd.DataFrame, ignore_columns: list[str] | None = None)
|
|
| 123 |
return df
|
| 124 |
|
| 125 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 126 |
def setup_basic():
|
| 127 |
title = "π LLM-Leaderboard"
|
| 128 |
|
|
@@ -146,7 +205,14 @@ def setup_leaderboard(readme: str):
|
|
| 146 |
df_leaderboard["Commercial Use?"] = df_leaderboard["Commercial Use?"].map({"yes": 1, "no": 0}).astype(bool)
|
| 147 |
|
| 148 |
st.markdown("## Leaderboard")
|
| 149 |
-
st.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
|
| 151 |
|
| 152 |
def setup_benchmarks(readme: str):
|
|
|
|
| 1 |
import io
|
| 2 |
import re
|
| 3 |
+
from collections.abc import Iterable
|
| 4 |
|
| 5 |
import pandas as pd
|
| 6 |
import streamlit as st
|
| 7 |
+
from pandas.api.types import is_datetime64_any_dtype, is_numeric_dtype
|
| 8 |
|
| 9 |
GITHUB_URL = "https://github.com/LudwigStumpp/llm-leaderboard"
|
| 10 |
|
|
|
|
| 88 |
return text
|
| 89 |
|
| 90 |
|
| 91 |
+
def filter_dataframe_by_row_and_columns(df: pd.DataFrame, ignore_columns: list[str] | None = None) -> pd.DataFrame:
|
| 92 |
"""
|
| 93 |
+
Filter dataframe by the rows and columns to display.
|
| 94 |
|
| 95 |
+
This does not select based on the values in the dataframe, but rather on the index and columns.
|
| 96 |
Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
|
| 97 |
|
| 98 |
Args:
|
|
|
|
| 102 |
Returns:
|
| 103 |
pd.DataFrame: Filtered dataframe
|
| 104 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
df = df.copy()
|
| 106 |
|
| 107 |
if ignore_columns is None:
|
|
|
|
| 121 |
return df
|
| 122 |
|
| 123 |
|
| 124 |
+
def filter_dataframe_by_column_values(df: pd.DataFrame) -> pd.DataFrame:
|
| 125 |
+
"""
|
| 126 |
+
Filter dataframe by the values in the dataframe.
|
| 127 |
+
|
| 128 |
+
Modified from https://blog.streamlit.io/auto-generate-a-dataframe-filtering-ui-in-streamlit-with-filter_dataframe/
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
df (pd.DataFrame): Original dataframe
|
| 132 |
+
|
| 133 |
+
Returns:
|
| 134 |
+
pd.DataFrame: Filtered dataframe
|
| 135 |
+
"""
|
| 136 |
+
df = df.copy()
|
| 137 |
+
|
| 138 |
+
modification_container = st.container()
|
| 139 |
+
|
| 140 |
+
with modification_container:
|
| 141 |
+
to_filter_columns = st.multiselect("Filter results on:", df.columns)
|
| 142 |
+
left, right = st.columns((1, 20))
|
| 143 |
+
|
| 144 |
+
for column in to_filter_columns:
|
| 145 |
+
if is_numeric_dtype(df[column]):
|
| 146 |
+
_min = float(df[column].min())
|
| 147 |
+
_max = float(df[column].max())
|
| 148 |
+
|
| 149 |
+
if (_min != _max) and pd.notna(_min) and pd.notna(_max):
|
| 150 |
+
step = 0.01
|
| 151 |
+
user_num_input = right.slider(
|
| 152 |
+
f"Values for {column}:",
|
| 153 |
+
min_value=round(_min - step, 2),
|
| 154 |
+
max_value=round(_max + step, 2),
|
| 155 |
+
value=(_min, _max),
|
| 156 |
+
step=step,
|
| 157 |
+
)
|
| 158 |
+
df = df[df[column].between(*user_num_input)]
|
| 159 |
+
|
| 160 |
+
elif is_datetime64_any_dtype(df[column]):
|
| 161 |
+
user_date_input = right.date_input(
|
| 162 |
+
f"Values for {column}:",
|
| 163 |
+
value=(
|
| 164 |
+
df[column].min(),
|
| 165 |
+
df[column].max(),
|
| 166 |
+
),
|
| 167 |
+
)
|
| 168 |
+
if isinstance(user_date_input, Iterable) and len(user_date_input) == 2:
|
| 169 |
+
user_date_input_datetime = tuple(map(pd.to_datetime, user_date_input))
|
| 170 |
+
start_date, end_date = user_date_input_datetime
|
| 171 |
+
df = df.loc[df[column].between(start_date, end_date)]
|
| 172 |
+
|
| 173 |
+
else:
|
| 174 |
+
selected_values = right.multiselect(
|
| 175 |
+
f"Values for {column}:",
|
| 176 |
+
df[column].unique(),
|
| 177 |
+
)
|
| 178 |
+
|
| 179 |
+
if selected_values:
|
| 180 |
+
df = df[df[column].isin(selected_values)]
|
| 181 |
+
|
| 182 |
+
return df
|
| 183 |
+
|
| 184 |
+
|
| 185 |
def setup_basic():
|
| 186 |
title = "π LLM-Leaderboard"
|
| 187 |
|
|
|
|
| 205 |
df_leaderboard["Commercial Use?"] = df_leaderboard["Commercial Use?"].map({"yes": 1, "no": 0}).astype(bool)
|
| 206 |
|
| 207 |
st.markdown("## Leaderboard")
|
| 208 |
+
modify = st.checkbox("Add filters")
|
| 209 |
+
if modify:
|
| 210 |
+
df_leaderboard = filter_dataframe_by_row_and_columns(
|
| 211 |
+
df_leaderboard, ignore_columns=["Commercial Use?", "Publisher"]
|
| 212 |
+
)
|
| 213 |
+
df_leaderboard = filter_dataframe_by_column_values(df_leaderboard)
|
| 214 |
+
|
| 215 |
+
st.dataframe(df_leaderboard)
|
| 216 |
|
| 217 |
|
| 218 |
def setup_benchmarks(readme: str):
|