gaodrew commited on
Commit
7d98704
1 Parent(s): d722e9d

first commit

Browse files
Files changed (1) hide show
  1. app.py +119 -0
app.py ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from html import escape
5
+
6
+
7
+ st.set_page_config(layout="wide")
8
+
9
+ column_config = {
10
+ "Downloads": st.column_config.NumberColumn(
11
+ "Downloads", format="%d 📥"
12
+ ),
13
+ "Likes": st.column_config.NumberColumn(
14
+ "Likes", format="%d ❤️"
15
+ ),
16
+ "Hugging Face URL": st.column_config.LinkColumn("Hugging Face URL", display_text="Open"),
17
+ "Arxiv URL": st.column_config.LinkColumn("Arxiv URL", display_text="Open"),
18
+ "PapersWithCode URL": st.column_config.LinkColumn("PapersWithCode URL", display_text="Open")
19
+ }
20
+
21
+
22
+ @st.cache_data
23
+ def load_data():
24
+ file_path = 'HuggingFaceBenchmarkDatasetsWithTags - Copy of HuggingFaceBenchmarkDatasetsWithTags (1).csv'
25
+ data = pd.read_csv(file_path, na_values=['NA', ''])
26
+
27
+ data['Created At'] = pd.to_datetime(data['Created At'], errors='coerce')
28
+ data['Last Modified'] = pd.to_datetime(data['Last Modified'], errors='coerce')
29
+
30
+ numeric_cols = ['Downloads', 'Likes', 'Total Examples', 'Dataset Size (bytes)']
31
+ for col in numeric_cols:
32
+ data[col] = pd.to_numeric(data[col], errors='coerce')
33
+
34
+ data.replace("", np.nan, inplace=True)
35
+
36
+ data = data.drop(columns=['Card Data', 'Model Card README'])
37
+ return data
38
+
39
+ def escape_html(val):
40
+ return escape(val) if isinstance(val, str) else val
41
+
42
+ df = load_data()
43
+
44
+ st.title('Bench1k: LLM Benchmarks & Evals Database')
45
+ st.subheader('Explore 1,327+ benchmarks. By default, sorted by # of downloads.')
46
+ st.write("Use the sidebar to apply filters.")
47
+
48
+ search_query = st.text_input("Search benchmarks by keyword")
49
+
50
+
51
+ task_ids = df['Task IDs'].dropna().unique().tolist()
52
+ selected_task_id = st.sidebar.multiselect('Filter by Task IDs', task_ids)
53
+
54
+ task_categories = df['Task Categories'].dropna().unique().tolist()
55
+ selected_task_category = st.sidebar.multiselect('Filter by Task Categories', task_categories)
56
+
57
+ licenses = df['Licenses'].dropna().unique().tolist()
58
+ selected_license = st.sidebar.multiselect('Filter by License', licenses)
59
+
60
+ min_likes, max_likes = int(df['Likes'].min(skipna=True)), int(df['Likes'].max(skipna=True))
61
+ selected_likes = st.sidebar.slider('Filter by Likes', min_likes, max_likes, (min_likes, max_likes))
62
+
63
+ min_size, max_size = df['Dataset Size (bytes)'].min(skipna=True), df['Dataset Size (bytes)'].max(skipna=True)
64
+ selected_size = st.sidebar.slider('Filter by Dataset Size (bytes)', min_size, max_size, (min_size, max_size))
65
+
66
+ min_examples, max_examples = df['Total Examples'].min(skipna=True), df['Total Examples'].max(skipna=True)
67
+ selected_examples = st.sidebar.slider('Filter by Total Examples', min_examples, max_examples, (min_examples, max_examples))
68
+
69
+ filtered_df = df
70
+
71
+ if search_query:
72
+ search_cols = df.select_dtypes(include=[object]).columns
73
+ filtered_df = filtered_df[filtered_df[search_cols].apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)]
74
+
75
+ if selected_task_id:
76
+ mask_task_id = filtered_df['Task IDs'].apply(lambda x: any(task_id.strip() in str(x).split(',') for task_id in selected_task_id))
77
+ filtered_df = filtered_df[mask_task_id]
78
+
79
+ if selected_task_category:
80
+ mask_task_category = filtered_df['Task Categories'].apply(lambda x: any(category.strip() in str(x).split(',') for category in selected_task_category))
81
+ filtered_df = filtered_df[mask_task_category]
82
+
83
+ if selected_license:
84
+ filtered_df = filtered_df[filtered_df['Licenses'].isin(selected_license)]
85
+
86
+ if selected_likes:
87
+ filtered_df = filtered_df[filtered_df['Likes'].between(selected_likes[0], selected_likes[1])]
88
+
89
+ if selected_size:
90
+ filtered_df = filtered_df[filtered_df['Dataset Size (bytes)'].between(selected_size[0], selected_size[1])]
91
+
92
+ if selected_examples:
93
+ filtered_df = filtered_df[filtered_df['Total Examples'].between(selected_examples[0], selected_examples[1])]
94
+
95
+ def clean_html_sensitive_content(val):
96
+ if isinstance(val, str):
97
+ val = ''.join(e for e in val if e.isalnum() or e in [' ', '-', '_'])
98
+ return val
99
+
100
+ filtered_df['Task IDs'] = filtered_df['Task IDs'].apply(clean_html_sensitive_content)
101
+ filtered_df['Task Categories'] = filtered_df['Task Categories'].apply(clean_html_sensitive_content)
102
+
103
+ st.dataframe(filtered_df, column_config=column_config, hide_index=True)
104
+
105
+ st.sidebar.info("Use the filters above to explore different aspects of the benchmark datasets.")
106
+
107
+
108
+ @st.cache_data
109
+ def convert_df(df):
110
+ return df.to_csv().encode('utf-8')
111
+
112
+ csv = convert_df(df)
113
+
114
+ st.download_button(
115
+ label="Download database as CSV",
116
+ data=csv,
117
+ file_name='bench1k_database_full.csv',
118
+ mime='text/csv',
119
+ )