first commit
Browse files
app.py
ADDED
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
from html import escape
|
5 |
+
|
6 |
+
|
7 |
+
st.set_page_config(layout="wide")
|
8 |
+
|
9 |
+
column_config = {
|
10 |
+
"Downloads": st.column_config.NumberColumn(
|
11 |
+
"Downloads", format="%d 📥"
|
12 |
+
),
|
13 |
+
"Likes": st.column_config.NumberColumn(
|
14 |
+
"Likes", format="%d ❤️"
|
15 |
+
),
|
16 |
+
"Hugging Face URL": st.column_config.LinkColumn("Hugging Face URL", display_text="Open"),
|
17 |
+
"Arxiv URL": st.column_config.LinkColumn("Arxiv URL", display_text="Open"),
|
18 |
+
"PapersWithCode URL": st.column_config.LinkColumn("PapersWithCode URL", display_text="Open")
|
19 |
+
}
|
20 |
+
|
21 |
+
|
22 |
+
@st.cache_data
|
23 |
+
def load_data():
|
24 |
+
file_path = 'HuggingFaceBenchmarkDatasetsWithTags - Copy of HuggingFaceBenchmarkDatasetsWithTags (1).csv'
|
25 |
+
data = pd.read_csv(file_path, na_values=['NA', ''])
|
26 |
+
|
27 |
+
data['Created At'] = pd.to_datetime(data['Created At'], errors='coerce')
|
28 |
+
data['Last Modified'] = pd.to_datetime(data['Last Modified'], errors='coerce')
|
29 |
+
|
30 |
+
numeric_cols = ['Downloads', 'Likes', 'Total Examples', 'Dataset Size (bytes)']
|
31 |
+
for col in numeric_cols:
|
32 |
+
data[col] = pd.to_numeric(data[col], errors='coerce')
|
33 |
+
|
34 |
+
data.replace("", np.nan, inplace=True)
|
35 |
+
|
36 |
+
data = data.drop(columns=['Card Data', 'Model Card README'])
|
37 |
+
return data
|
38 |
+
|
39 |
+
def escape_html(val):
|
40 |
+
return escape(val) if isinstance(val, str) else val
|
41 |
+
|
42 |
+
df = load_data()
|
43 |
+
|
44 |
+
st.title('Bench1k: LLM Benchmarks & Evals Database')
|
45 |
+
st.subheader('Explore 1,327+ benchmarks. By default, sorted by # of downloads.')
|
46 |
+
st.write("Use the sidebar to apply filters.")
|
47 |
+
|
48 |
+
search_query = st.text_input("Search benchmarks by keyword")
|
49 |
+
|
50 |
+
|
51 |
+
task_ids = df['Task IDs'].dropna().unique().tolist()
|
52 |
+
selected_task_id = st.sidebar.multiselect('Filter by Task IDs', task_ids)
|
53 |
+
|
54 |
+
task_categories = df['Task Categories'].dropna().unique().tolist()
|
55 |
+
selected_task_category = st.sidebar.multiselect('Filter by Task Categories', task_categories)
|
56 |
+
|
57 |
+
licenses = df['Licenses'].dropna().unique().tolist()
|
58 |
+
selected_license = st.sidebar.multiselect('Filter by License', licenses)
|
59 |
+
|
60 |
+
min_likes, max_likes = int(df['Likes'].min(skipna=True)), int(df['Likes'].max(skipna=True))
|
61 |
+
selected_likes = st.sidebar.slider('Filter by Likes', min_likes, max_likes, (min_likes, max_likes))
|
62 |
+
|
63 |
+
min_size, max_size = df['Dataset Size (bytes)'].min(skipna=True), df['Dataset Size (bytes)'].max(skipna=True)
|
64 |
+
selected_size = st.sidebar.slider('Filter by Dataset Size (bytes)', min_size, max_size, (min_size, max_size))
|
65 |
+
|
66 |
+
min_examples, max_examples = df['Total Examples'].min(skipna=True), df['Total Examples'].max(skipna=True)
|
67 |
+
selected_examples = st.sidebar.slider('Filter by Total Examples', min_examples, max_examples, (min_examples, max_examples))
|
68 |
+
|
69 |
+
filtered_df = df
|
70 |
+
|
71 |
+
if search_query:
|
72 |
+
search_cols = df.select_dtypes(include=[object]).columns
|
73 |
+
filtered_df = filtered_df[filtered_df[search_cols].apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)]
|
74 |
+
|
75 |
+
if selected_task_id:
|
76 |
+
mask_task_id = filtered_df['Task IDs'].apply(lambda x: any(task_id.strip() in str(x).split(',') for task_id in selected_task_id))
|
77 |
+
filtered_df = filtered_df[mask_task_id]
|
78 |
+
|
79 |
+
if selected_task_category:
|
80 |
+
mask_task_category = filtered_df['Task Categories'].apply(lambda x: any(category.strip() in str(x).split(',') for category in selected_task_category))
|
81 |
+
filtered_df = filtered_df[mask_task_category]
|
82 |
+
|
83 |
+
if selected_license:
|
84 |
+
filtered_df = filtered_df[filtered_df['Licenses'].isin(selected_license)]
|
85 |
+
|
86 |
+
if selected_likes:
|
87 |
+
filtered_df = filtered_df[filtered_df['Likes'].between(selected_likes[0], selected_likes[1])]
|
88 |
+
|
89 |
+
if selected_size:
|
90 |
+
filtered_df = filtered_df[filtered_df['Dataset Size (bytes)'].between(selected_size[0], selected_size[1])]
|
91 |
+
|
92 |
+
if selected_examples:
|
93 |
+
filtered_df = filtered_df[filtered_df['Total Examples'].between(selected_examples[0], selected_examples[1])]
|
94 |
+
|
95 |
+
def clean_html_sensitive_content(val):
|
96 |
+
if isinstance(val, str):
|
97 |
+
val = ''.join(e for e in val if e.isalnum() or e in [' ', '-', '_'])
|
98 |
+
return val
|
99 |
+
|
100 |
+
filtered_df['Task IDs'] = filtered_df['Task IDs'].apply(clean_html_sensitive_content)
|
101 |
+
filtered_df['Task Categories'] = filtered_df['Task Categories'].apply(clean_html_sensitive_content)
|
102 |
+
|
103 |
+
st.dataframe(filtered_df, column_config=column_config, hide_index=True)
|
104 |
+
|
105 |
+
st.sidebar.info("Use the filters above to explore different aspects of the benchmark datasets.")
|
106 |
+
|
107 |
+
|
108 |
+
@st.cache_data
|
109 |
+
def convert_df(df):
|
110 |
+
return df.to_csv().encode('utf-8')
|
111 |
+
|
112 |
+
csv = convert_df(df)
|
113 |
+
|
114 |
+
st.download_button(
|
115 |
+
label="Download database as CSV",
|
116 |
+
data=csv,
|
117 |
+
file_name='bench1k_database_full.csv',
|
118 |
+
mime='text/csv',
|
119 |
+
)
|