import streamlit as st import pandas as pd import numpy as np from html import escape st.set_page_config(layout="wide") column_config = { "Downloads": st.column_config.NumberColumn( "Downloads", format="%d 📥" ), "Likes": st.column_config.NumberColumn( "Likes", format="%d ❤️" ), "Hugging Face URL": st.column_config.LinkColumn("Hugging Face URL", display_text="Open"), "Arxiv URL": st.column_config.LinkColumn("Arxiv URL", display_text="Open"), "PapersWithCode URL": st.column_config.LinkColumn("PapersWithCode URL", display_text="Open") } @st.cache_data def load_data(): file_path = 'HuggingFaceBenchmarkDatasetsWithTags - Copy of HuggingFaceBenchmarkDatasetsWithTags (1).csv' data = pd.read_csv(file_path, na_values=['NA', '']) data['Created At'] = pd.to_datetime(data['Created At'], errors='coerce') data['Last Modified'] = pd.to_datetime(data['Last Modified'], errors='coerce') numeric_cols = ['Downloads', 'Likes', 'Total Examples', 'Dataset Size (bytes)'] for col in numeric_cols: data[col] = pd.to_numeric(data[col], errors='coerce') data.replace("", np.nan, inplace=True) data = data.drop(columns=['Card Data', 'Model Card README']) return data def escape_html(val): return escape(val) if isinstance(val, str) else val df = load_data() st.title('Bench1k: LLM Benchmarks & Evals Database') st.subheader('Explore 1,327+ benchmarks. By default, sorted by # of downloads.') st.write("Use the sidebar to apply filters.") search_query = st.text_input("Search benchmarks by keyword") task_ids = df['Task IDs'].dropna().unique().tolist() selected_task_id = st.sidebar.multiselect('Filter by Task IDs', task_ids) task_categories = df['Task Categories'].dropna().unique().tolist() selected_task_category = st.sidebar.multiselect('Filter by Task Categories', task_categories) licenses = df['Licenses'].dropna().unique().tolist() selected_license = st.sidebar.multiselect('Filter by License', licenses) min_likes, max_likes = int(df['Likes'].min(skipna=True)), int(df['Likes'].max(skipna=True)) selected_likes = st.sidebar.slider('Filter by Likes', min_likes, max_likes, (min_likes, max_likes)) min_size, max_size = df['Dataset Size (bytes)'].min(skipna=True), df['Dataset Size (bytes)'].max(skipna=True) selected_size = st.sidebar.slider('Filter by Dataset Size (bytes)', min_size, max_size, (min_size, max_size)) min_examples, max_examples = df['Total Examples'].min(skipna=True), df['Total Examples'].max(skipna=True) selected_examples = st.sidebar.slider('Filter by Total Examples', min_examples, max_examples, (min_examples, max_examples)) filtered_df = df if search_query: search_cols = df.select_dtypes(include=[object]).columns filtered_df = filtered_df[filtered_df[search_cols].apply(lambda x: x.str.contains(search_query, case=False, na=False)).any(axis=1)] if selected_task_id: mask_task_id = filtered_df['Task IDs'].apply(lambda x: any(task_id.strip() in str(x).split(',') for task_id in selected_task_id)) filtered_df = filtered_df[mask_task_id] if selected_task_category: mask_task_category = filtered_df['Task Categories'].apply(lambda x: any(category.strip() in str(x).split(',') for category in selected_task_category)) filtered_df = filtered_df[mask_task_category] if selected_license: filtered_df = filtered_df[filtered_df['Licenses'].isin(selected_license)] if selected_likes: filtered_df = filtered_df[filtered_df['Likes'].between(selected_likes[0], selected_likes[1])] if selected_size: filtered_df = filtered_df[filtered_df['Dataset Size (bytes)'].between(selected_size[0], selected_size[1])] if selected_examples: filtered_df = filtered_df[filtered_df['Total Examples'].between(selected_examples[0], selected_examples[1])] def clean_html_sensitive_content(val): if isinstance(val, str): val = ''.join(e for e in val if e.isalnum() or e in [' ', '-', '_']) return val filtered_df['Task IDs'] = filtered_df['Task IDs'].apply(clean_html_sensitive_content) filtered_df['Task Categories'] = filtered_df['Task Categories'].apply(clean_html_sensitive_content) st.dataframe(filtered_df, column_config=column_config, hide_index=True) st.sidebar.info("Use the filters above to explore different aspects of the benchmark datasets.") @st.cache_data def convert_df(df): return df.to_csv().encode('utf-8') csv = convert_df(df) st.download_button( label="Download database as CSV", data=csv, file_name='bench1k_database_full.csv', mime='text/csv', )