alibayram's picture
Refactor Gradio app to enhance leaderboard functionality, improve model response search, and streamline model submission process
4ecae57
raw
history blame
3.96 kB
import gradio as gr
from gradio_leaderboard import Leaderboard, ColumnFilter, SelectColumns
import pandas as pd
from apscheduler.schedulers.background import BackgroundScheduler
from huggingface_hub import snapshot_download
# Define dataset paths and constants
LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
REPO_ID = "alibayram"
QUEUE_REPO = "queue-repo"
RESULTS_REPO = "results-repo"
TOKEN = "your_hf_token"
# Load datasets
try:
leaderboard_data = pd.read_parquet(LEADERBOARD_PATH)
model_responses_data = pd.read_parquet(RESPONSES_PATH)
section_results_data = pd.read_parquet(SECTION_RESULTS_PATH)
except Exception as e:
print(f"Error loading datasets: {e}")
raise
# Initialize leaderboard
def init_leaderboard(dataframe):
if dataframe is None or dataframe.empty:
raise ValueError("Leaderboard DataFrame is empty or None.")
return Leaderboard(
value=dataframe,
datatype=[col.dtype.name for col in dataframe.dtypes],
select_columns=SelectColumns(
default_selection=["model", "basari", "toplam_sure"],
label="Select Columns to Display",
),
search_columns=["model"],
filter_columns=[
ColumnFilter("family", type="checkboxgroup", label="Model Family"),
ColumnFilter("quantization_level", type="checkboxgroup", label="Quantization Level"),
],
)
# Refresh datasets
def restart_space():
snapshot_download(repo_id=QUEUE_REPO, local_dir="queue_cache", repo_type="dataset", token=TOKEN)
snapshot_download(repo_id=RESULTS_REPO, local_dir="results_cache", repo_type="dataset", token=TOKEN)
# Gradio app setup
demo = gr.Blocks(css=".container { max-width: 1200px; margin: auto; }")
with demo:
gr.HTML("<h1>πŸ† Turkish MMLU Leaderboard</h1>")
gr.Markdown("Explore, evaluate, and compare AI model performance.")
# Tabs for leaderboard, model responses, and submission
with gr.Tabs() as tabs:
with gr.TabItem("Leaderboard"):
gr.Markdown("### Explore Leaderboard")
leaderboard = init_leaderboard(leaderboard_data)
with gr.TabItem("Model Responses"):
gr.Markdown("### Model Responses")
model_dropdown = gr.Dropdown(
choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
)
query_input = gr.Textbox(label="Search Questions")
responses_table = gr.DataFrame()
gr.Button("Search").click(
lambda model, query: model_responses_data[model_responses_data["bolum"].str.contains(query)],
inputs=[model_dropdown, query_input],
outputs=responses_table,
)
with gr.TabItem("Submit Model"):
gr.Markdown("### Submit Your Model for Evaluation")
model_name = gr.Textbox(label="Model Name")
revision = gr.Textbox(label="Revision", placeholder="main")
precision = gr.Dropdown(
choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
)
submit_button = gr.Button("Submit")
submission_result = gr.Markdown()
submit_button.click(
lambda name, rev, prec: f"Submitted {name} with revision {rev} and precision {prec}.",
inputs=[model_name, revision, precision],
outputs=submission_result,
)
# Scheduler for automatic updates
scheduler = BackgroundScheduler()
scheduler.add_job(restart_space, "interval", seconds=1800)
scheduler.start()
# Launch app
demo.queue(max_size=40).launch()