Add configuration and data management for Gradio app, implement filtering, response search, and section results plotting functionalities
Browse files- app.py +117 -102
- app_e.py +115 -0
- config.py +33 -0
- data_manager.py +59 -0
- utils.py +67 -0
app.py
CHANGED
@@ -1,115 +1,130 @@
|
|
1 |
import gradio as gr
|
2 |
from apscheduler.schedulers.background import BackgroundScheduler
|
3 |
-
from
|
4 |
-
import
|
5 |
-
import matplotlib.pyplot as plt
|
6 |
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
|
11 |
|
12 |
-
|
13 |
-
|
14 |
-
leaderboard_data = pd.read_parquet(LEADERBOARD_PATH)
|
15 |
-
model_responses_data = pd.read_parquet(RESPONSES_PATH)
|
16 |
-
section_results_data = pd.read_parquet(SECTION_RESULTS_PATH)
|
17 |
-
except Exception as e:
|
18 |
-
print(f"Error loading datasets: {e}")
|
19 |
-
raise
|
20 |
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
df = df[df["quantization_level"] == quantization_level]
|
28 |
-
return df
|
29 |
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
|
|
47 |
|
48 |
-
#
|
49 |
-
with gr.
|
50 |
-
|
51 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
-
|
54 |
-
# Leaderboard Tab
|
55 |
-
with gr.TabItem("Leaderboard"):
|
56 |
-
family_filter = gr.Dropdown(
|
57 |
-
choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
|
58 |
-
)
|
59 |
-
quantization_filter = gr.Dropdown(
|
60 |
-
choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
|
61 |
-
)
|
62 |
-
leaderboard_table = gr.DataFrame(leaderboard_data)
|
63 |
-
gr.Button("Apply Filters").click(
|
64 |
-
filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
|
65 |
-
)
|
66 |
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
)
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
gr.DataFrame(section_results_data)
|
82 |
|
83 |
-
|
84 |
-
|
85 |
-
gr.Markdown("### Submit Your Model for Evaluation")
|
86 |
-
model_name = gr.Textbox(label="Model Name")
|
87 |
-
base_model = gr.Textbox(label="Base Model")
|
88 |
-
revision = gr.Textbox(label="Revision", placeholder="main")
|
89 |
-
precision = gr.Dropdown(
|
90 |
-
choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
|
91 |
-
)
|
92 |
-
weight_type = gr.Dropdown(
|
93 |
-
choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
|
94 |
-
)
|
95 |
-
model_type = gr.Dropdown(
|
96 |
-
choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
|
97 |
-
)
|
98 |
-
submit_button = gr.Button("Submit")
|
99 |
-
submission_output = gr.Markdown()
|
100 |
-
submit_button.click(
|
101 |
-
add_new_model,
|
102 |
-
inputs=[model_name, base_model, revision, precision, weight_type, model_type],
|
103 |
-
outputs=submission_output,
|
104 |
-
)
|
105 |
-
|
106 |
-
# Scheduler for refreshing datasets
|
107 |
-
scheduler = BackgroundScheduler()
|
108 |
-
scheduler.add_job(
|
109 |
-
lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
|
110 |
-
"interval", seconds=1800
|
111 |
-
)
|
112 |
-
scheduler.start()
|
113 |
-
|
114 |
-
# Launch app
|
115 |
-
app.queue(default_concurrency_limit=40).launch()
|
|
|
1 |
import gradio as gr
|
2 |
from apscheduler.schedulers.background import BackgroundScheduler
|
3 |
+
from typing import Optional
|
4 |
+
import logging
|
|
|
5 |
|
6 |
+
from config import CONFIG
|
7 |
+
from data_manager import data_manager
|
8 |
+
from utils import filter_leaderboard, search_responses, plot_section_results, validate_model_submission
|
|
|
9 |
|
10 |
+
logging.basicConfig(level=logging.INFO)
|
11 |
+
logger = logging.getLogger(__name__)
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
|
13 |
+
def create_app() -> gr.Blocks:
|
14 |
+
"""Create and configure the Gradio application."""
|
15 |
+
|
16 |
+
with gr.Blocks(css=CONFIG["ui"].css, theme=CONFIG["ui"].theme) as app:
|
17 |
+
gr.HTML(f"<h1>{CONFIG['ui'].title}</h1>")
|
18 |
+
gr.Markdown(CONFIG["ui"].description)
|
|
|
|
|
19 |
|
20 |
+
with gr.Tabs() as tabs:
|
21 |
+
# Leaderboard Tab
|
22 |
+
with gr.TabItem("📊 Leaderboard"):
|
23 |
+
with gr.Row():
|
24 |
+
family_filter = gr.Dropdown(
|
25 |
+
choices=data_manager.leaderboard_data["family"].unique().tolist(),
|
26 |
+
label="Filter by Family",
|
27 |
+
multiselect=False
|
28 |
+
)
|
29 |
+
quantization_filter = gr.Dropdown(
|
30 |
+
choices=data_manager.leaderboard_data["quantization_level"].unique().tolist(),
|
31 |
+
label="Filter by Quantization Level"
|
32 |
+
)
|
33 |
+
|
34 |
+
filter_btn = gr.Button("Apply Filters", variant="primary")
|
35 |
+
leaderboard_table = gr.DataFrame(
|
36 |
+
value=data_manager.leaderboard_data,
|
37 |
+
interactive=False
|
38 |
+
)
|
39 |
+
|
40 |
+
filter_btn.click(
|
41 |
+
filter_leaderboard,
|
42 |
+
inputs=[family_filter, quantization_filter],
|
43 |
+
outputs=leaderboard_table
|
44 |
+
)
|
45 |
|
46 |
+
# Model Responses Tab
|
47 |
+
with gr.TabItem("🔍 Model Responses"):
|
48 |
+
with gr.Row():
|
49 |
+
model_dropdown = gr.Dropdown(
|
50 |
+
choices=data_manager.leaderboard_data["model"].unique().tolist(),
|
51 |
+
label="Select Model"
|
52 |
+
)
|
53 |
+
query_input = gr.Textbox(
|
54 |
+
label="Search Query",
|
55 |
+
placeholder="Enter search terms..."
|
56 |
+
)
|
57 |
+
|
58 |
+
search_btn = gr.Button("Search", variant="primary")
|
59 |
+
responses_table = gr.DataFrame()
|
60 |
+
|
61 |
+
search_btn.click(
|
62 |
+
search_responses,
|
63 |
+
inputs=[query_input, model_dropdown],
|
64 |
+
outputs=responses_table
|
65 |
+
)
|
66 |
|
67 |
+
# Section Results Tab
|
68 |
+
with gr.TabItem("📈 Section Results"):
|
69 |
+
gr.Plot(value=plot_section_results)
|
70 |
+
gr.DataFrame(value=data_manager.section_results_data)
|
71 |
|
72 |
+
# Submit Model Tab
|
73 |
+
with gr.TabItem("➕ Submit Model"):
|
74 |
+
gr.Markdown("### Submit Your Model for Evaluation")
|
75 |
+
|
76 |
+
with gr.Group():
|
77 |
+
model_name = gr.Textbox(label="Model Name", placeholder="Enter unique model name")
|
78 |
+
base_model = gr.Textbox(label="Base Model", placeholder="Enter base model name")
|
79 |
+
revision = gr.Textbox(label="Revision", value="main")
|
80 |
+
|
81 |
+
with gr.Row():
|
82 |
+
precision = gr.Dropdown(
|
83 |
+
choices=CONFIG["model"].precision_options,
|
84 |
+
label="Precision",
|
85 |
+
value="float16"
|
86 |
+
)
|
87 |
+
weight_type = gr.Dropdown(
|
88 |
+
choices=CONFIG["model"].weight_types,
|
89 |
+
label="Weight Type",
|
90 |
+
value="Original"
|
91 |
+
)
|
92 |
+
model_type = gr.Dropdown(
|
93 |
+
choices=CONFIG["model"].model_types,
|
94 |
+
label="Model Type",
|
95 |
+
value="Transformer"
|
96 |
+
)
|
97 |
+
|
98 |
+
submit_btn = gr.Button("Submit Model", variant="primary")
|
99 |
+
submission_output = gr.Markdown()
|
100 |
+
|
101 |
+
def handle_submission(*args):
|
102 |
+
is_valid, message = validate_model_submission(*args)
|
103 |
+
if not is_valid:
|
104 |
+
return f"❌ {message}"
|
105 |
+
return "✅ Model submitted successfully!"
|
106 |
+
|
107 |
+
submit_btn.click(
|
108 |
+
handle_submission,
|
109 |
+
inputs=[model_name, base_model, revision, precision, weight_type, model_type],
|
110 |
+
outputs=submission_output
|
111 |
+
)
|
112 |
|
113 |
+
return app
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
114 |
|
115 |
+
def main():
|
116 |
+
# Initialize scheduler for data refresh
|
117 |
+
scheduler = BackgroundScheduler()
|
118 |
+
scheduler.add_job(
|
119 |
+
data_manager.refresh_datasets,
|
120 |
+
"interval",
|
121 |
+
seconds=CONFIG["dataset"].refresh_interval
|
122 |
+
)
|
123 |
+
scheduler.start()
|
|
|
124 |
|
125 |
+
# Create and launch app
|
126 |
+
app = create_app()
|
127 |
+
app.queue(default_concurrency_limit=40).launch()
|
|
|
128 |
|
129 |
+
if __name__ == "__main__":
|
130 |
+
main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
app_e.py
ADDED
@@ -0,0 +1,115 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import gradio as gr
|
2 |
+
from apscheduler.schedulers.background import BackgroundScheduler
|
3 |
+
from huggingface_hub import snapshot_download
|
4 |
+
import pandas as pd
|
5 |
+
import matplotlib.pyplot as plt
|
6 |
+
|
7 |
+
# Dataset paths
|
8 |
+
LEADERBOARD_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
|
9 |
+
RESPONSES_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
|
10 |
+
SECTION_RESULTS_PATH = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
|
11 |
+
|
12 |
+
# Load datasets
|
13 |
+
try:
|
14 |
+
leaderboard_data = pd.read_parquet(LEADERBOARD_PATH)
|
15 |
+
model_responses_data = pd.read_parquet(RESPONSES_PATH)
|
16 |
+
section_results_data = pd.read_parquet(SECTION_RESULTS_PATH)
|
17 |
+
except Exception as e:
|
18 |
+
print(f"Error loading datasets: {e}")
|
19 |
+
raise
|
20 |
+
|
21 |
+
# Helper functions
|
22 |
+
def filter_leaderboard(family=None, quantization_level=None):
|
23 |
+
df = leaderboard_data.copy()
|
24 |
+
if family:
|
25 |
+
df = df[df["family"] == family]
|
26 |
+
if quantization_level:
|
27 |
+
df = df[df["quantization_level"] == quantization_level]
|
28 |
+
return df
|
29 |
+
|
30 |
+
def search_responses(query, model):
|
31 |
+
filtered = model_responses_data[model_responses_data["bolum"].str.contains(query, case=False)]
|
32 |
+
selected_columns = ["bolum", "soru", "cevap", model + "_cevap"]
|
33 |
+
return filtered[selected_columns]
|
34 |
+
|
35 |
+
def plot_section_results():
|
36 |
+
fig, ax = plt.subplots(figsize=(10, 6))
|
37 |
+
avg_scores = section_results_data.mean(numeric_only=True)
|
38 |
+
avg_scores.plot(kind="bar", ax=ax)
|
39 |
+
ax.set_title("Average Section-Wise Performance")
|
40 |
+
ax.set_ylabel("Accuracy (%)")
|
41 |
+
ax.set_xlabel("Sections")
|
42 |
+
return fig # Return the figure object
|
43 |
+
|
44 |
+
def add_new_model(model_name, base_model, revision, precision, weight_type, model_type):
|
45 |
+
# Simulated model submission logic
|
46 |
+
return f"Model '{model_name}' submitted successfully!"
|
47 |
+
|
48 |
+
# Gradio app structure
|
49 |
+
with gr.Blocks(css=".container { max-width: 1200px; margin: auto; }") as app:
|
50 |
+
gr.HTML("<h1>🏆 Turkish MMLU Leaderboard</h1>")
|
51 |
+
gr.Markdown("Explore, evaluate, and compare AI model performance.")
|
52 |
+
|
53 |
+
with gr.Tabs() as tabs:
|
54 |
+
# Leaderboard Tab
|
55 |
+
with gr.TabItem("Leaderboard"):
|
56 |
+
family_filter = gr.Dropdown(
|
57 |
+
choices=leaderboard_data["family"].unique().tolist(), label="Filter by Family", multiselect=False
|
58 |
+
)
|
59 |
+
quantization_filter = gr.Dropdown(
|
60 |
+
choices=leaderboard_data["quantization_level"].unique().tolist(), label="Filter by Quantization Level"
|
61 |
+
)
|
62 |
+
leaderboard_table = gr.DataFrame(leaderboard_data)
|
63 |
+
gr.Button("Apply Filters").click(
|
64 |
+
filter_leaderboard, inputs=[family_filter, quantization_filter], outputs=leaderboard_table
|
65 |
+
)
|
66 |
+
|
67 |
+
# Model Responses Tab
|
68 |
+
with gr.TabItem("Model Responses"):
|
69 |
+
model_dropdown = gr.Dropdown(
|
70 |
+
choices=leaderboard_data["model"].unique().tolist(), label="Select Model"
|
71 |
+
)
|
72 |
+
query_input = gr.Textbox(label="Search Query")
|
73 |
+
responses_table = gr.DataFrame()
|
74 |
+
gr.Button("Search").click(
|
75 |
+
search_responses, inputs=[query_input, model_dropdown], outputs=responses_table
|
76 |
+
)
|
77 |
+
|
78 |
+
# Section Results Tab
|
79 |
+
with gr.TabItem("Section Results"):
|
80 |
+
gr.Plot(plot_section_results)
|
81 |
+
gr.DataFrame(section_results_data)
|
82 |
+
|
83 |
+
# Submit Model Tab
|
84 |
+
with gr.TabItem("Submit Model"):
|
85 |
+
gr.Markdown("### Submit Your Model for Evaluation")
|
86 |
+
model_name = gr.Textbox(label="Model Name")
|
87 |
+
base_model = gr.Textbox(label="Base Model")
|
88 |
+
revision = gr.Textbox(label="Revision", placeholder="main")
|
89 |
+
precision = gr.Dropdown(
|
90 |
+
choices=["float16", "int8", "bfloat16", "float32"], label="Precision", value="float16"
|
91 |
+
)
|
92 |
+
weight_type = gr.Dropdown(
|
93 |
+
choices=["Original", "Delta", "Adapter"], label="Weight Type", value="Original"
|
94 |
+
)
|
95 |
+
model_type = gr.Dropdown(
|
96 |
+
choices=["Transformer", "RNN", "GPT", "Other"], label="Model Type", value="Transformer"
|
97 |
+
)
|
98 |
+
submit_button = gr.Button("Submit")
|
99 |
+
submission_output = gr.Markdown()
|
100 |
+
submit_button.click(
|
101 |
+
add_new_model,
|
102 |
+
inputs=[model_name, base_model, revision, precision, weight_type, model_type],
|
103 |
+
outputs=submission_output,
|
104 |
+
)
|
105 |
+
|
106 |
+
# Scheduler for refreshing datasets
|
107 |
+
scheduler = BackgroundScheduler()
|
108 |
+
scheduler.add_job(
|
109 |
+
lambda: snapshot_download(repo_id="alibayram", repo_type="dataset", local_dir="cache"),
|
110 |
+
"interval", seconds=1800
|
111 |
+
)
|
112 |
+
scheduler.start()
|
113 |
+
|
114 |
+
# Launch app
|
115 |
+
app.queue(default_concurrency_limit=40).launch()
|
config.py
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from dataclasses import dataclass
|
2 |
+
from typing import Dict, List
|
3 |
+
|
4 |
+
@dataclass
|
5 |
+
class DatasetConfig:
|
6 |
+
leaderboard_path: str = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_liderlik_tablosu/data/train-00000-of-00001.parquet"
|
7 |
+
responses_path: str = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_model_cevaplari/data/train-00000-of-00001.parquet"
|
8 |
+
section_results_path: str = "hf://datasets/alibayram/yapay_zeka_turkce_mmlu_bolum_sonuclari/data/train-00000-of-00001.parquet"
|
9 |
+
cache_dir: str = "cache"
|
10 |
+
refresh_interval: int = 1800 # 30 minutes
|
11 |
+
|
12 |
+
@dataclass
|
13 |
+
class UIConfig:
|
14 |
+
title: str = "🏆 Turkish MMLU Leaderboard"
|
15 |
+
description: str = "Explore, evaluate, and compare AI model performance."
|
16 |
+
theme: str = "default"
|
17 |
+
css: str = """
|
18 |
+
.container { max-width: 1200px; margin: auto; padding: 20px; }
|
19 |
+
.gr-button { min-width: 150px; }
|
20 |
+
.gr-box { border-radius: 8px; }
|
21 |
+
"""
|
22 |
+
|
23 |
+
@dataclass
|
24 |
+
class ModelConfig:
|
25 |
+
precision_options: List[str] = ("float16", "int8", "bfloat16", "float32")
|
26 |
+
weight_types: List[str] = ("Original", "Delta", "Adapter")
|
27 |
+
model_types: List[str] = ("Transformer", "RNN", "GPT", "Other")
|
28 |
+
|
29 |
+
CONFIG = {
|
30 |
+
"dataset": DatasetConfig(),
|
31 |
+
"ui": UIConfig(),
|
32 |
+
"model": ModelConfig(),
|
33 |
+
}
|
data_manager.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional, Dict
|
2 |
+
import pandas as pd
|
3 |
+
from functools import lru_cache
|
4 |
+
from huggingface_hub import snapshot_download
|
5 |
+
import logging
|
6 |
+
from config import CONFIG
|
7 |
+
|
8 |
+
logging.basicConfig(level=logging.INFO)
|
9 |
+
logger = logging.getLogger(__name__)
|
10 |
+
|
11 |
+
class DataManager:
|
12 |
+
def __init__(self):
|
13 |
+
self._leaderboard_data: Optional[pd.DataFrame] = None
|
14 |
+
self._responses_data: Optional[pd.DataFrame] = None
|
15 |
+
self._section_results_data: Optional[pd.DataFrame] = None
|
16 |
+
|
17 |
+
@lru_cache(maxsize=1)
|
18 |
+
def _load_dataset(self, path: str) -> pd.DataFrame:
|
19 |
+
"""Load dataset with caching."""
|
20 |
+
try:
|
21 |
+
return pd.read_parquet(path)
|
22 |
+
except Exception as e:
|
23 |
+
logger.error(f"Error loading dataset from {path}: {e}")
|
24 |
+
raise RuntimeError(f"Failed to load dataset: {e}")
|
25 |
+
|
26 |
+
def refresh_datasets(self) -> None:
|
27 |
+
"""Refresh all datasets from source."""
|
28 |
+
try:
|
29 |
+
snapshot_download(
|
30 |
+
repo_id="alibayram",
|
31 |
+
repo_type="dataset",
|
32 |
+
local_dir=CONFIG["dataset"].cache_dir
|
33 |
+
)
|
34 |
+
# Clear cache to force reload
|
35 |
+
self._load_dataset.cache_clear()
|
36 |
+
logger.info("Datasets refreshed successfully")
|
37 |
+
except Exception as e:
|
38 |
+
logger.error(f"Error refreshing datasets: {e}")
|
39 |
+
|
40 |
+
@property
|
41 |
+
def leaderboard_data(self) -> pd.DataFrame:
|
42 |
+
if self._leaderboard_data is None:
|
43 |
+
self._leaderboard_data = self._load_dataset(CONFIG["dataset"].leaderboard_path)
|
44 |
+
return self._leaderboard_data
|
45 |
+
|
46 |
+
@property
|
47 |
+
def responses_data(self) -> pd.DataFrame:
|
48 |
+
if self._responses_data is None:
|
49 |
+
self._responses_data = self._load_dataset(CONFIG["dataset"].responses_path)
|
50 |
+
return self._responses_data
|
51 |
+
|
52 |
+
@property
|
53 |
+
def section_results_data(self) -> pd.DataFrame:
|
54 |
+
if self._section_results_data is None:
|
55 |
+
self._section_results_data = self._load_dataset(CONFIG["dataset"].section_results_path)
|
56 |
+
return self._section_results_data
|
57 |
+
|
58 |
+
# Global instance
|
59 |
+
data_manager = DataManager()
|
utils.py
ADDED
@@ -0,0 +1,67 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from typing import Optional, Dict
|
2 |
+
import pandas as pd
|
3 |
+
import matplotlib.pyplot as plt
|
4 |
+
from data_manager import data_manager
|
5 |
+
|
6 |
+
def filter_leaderboard(
|
7 |
+
family: Optional[str] = None,
|
8 |
+
quantization_level: Optional[str] = None
|
9 |
+
) -> pd.DataFrame:
|
10 |
+
"""Filter leaderboard data based on criteria."""
|
11 |
+
df = data_manager.leaderboard_data.copy()
|
12 |
+
|
13 |
+
if family:
|
14 |
+
df = df[df["family"] == family]
|
15 |
+
if quantization_level:
|
16 |
+
df = df[df["quantization_level"] == quantization_level]
|
17 |
+
|
18 |
+
return df.sort_values("score", ascending=False)
|
19 |
+
|
20 |
+
def search_responses(query: str, model: str) -> pd.DataFrame:
|
21 |
+
"""Search model responses based on query."""
|
22 |
+
if not query or not model:
|
23 |
+
return pd.DataFrame()
|
24 |
+
|
25 |
+
filtered = data_manager.responses_data[
|
26 |
+
data_manager.responses_data["bolum"].str.contains(query, case=False, na=False)
|
27 |
+
]
|
28 |
+
|
29 |
+
selected_columns = ["bolum", "soru", "cevap", f"{model}_cevap"]
|
30 |
+
return filtered[selected_columns].dropna()
|
31 |
+
|
32 |
+
def plot_section_results() -> plt.Figure:
|
33 |
+
"""Generate section results plot."""
|
34 |
+
fig, ax = plt.subplots(figsize=(12, 6))
|
35 |
+
avg_scores = data_manager.section_results_data.mean(numeric_only=True)
|
36 |
+
|
37 |
+
bars = avg_scores.plot(kind="bar", ax=ax)
|
38 |
+
|
39 |
+
# Customize plot
|
40 |
+
ax.set_title("Average Section-Wise Performance", pad=20)
|
41 |
+
ax.set_ylabel("Accuracy (%)")
|
42 |
+
ax.set_xlabel("Sections")
|
43 |
+
plt.xticks(rotation=45, ha='right')
|
44 |
+
plt.tight_layout()
|
45 |
+
|
46 |
+
# Add value labels
|
47 |
+
for i, v in enumerate(avg_scores):
|
48 |
+
ax.text(i, v, f'{v:.1f}%', ha='center', va='bottom')
|
49 |
+
|
50 |
+
return fig
|
51 |
+
|
52 |
+
def validate_model_submission(
|
53 |
+
model_name: str,
|
54 |
+
base_model: str,
|
55 |
+
revision: str,
|
56 |
+
precision: str,
|
57 |
+
weight_type: str,
|
58 |
+
model_type: str
|
59 |
+
) -> tuple[bool, str]:
|
60 |
+
"""Validate model submission parameters."""
|
61 |
+
if not all([model_name, base_model]):
|
62 |
+
return False, "Model name and base model are required."
|
63 |
+
|
64 |
+
if model_name in data_manager.leaderboard_data["model"].values:
|
65 |
+
return False, "Model name already exists."
|
66 |
+
|
67 |
+
return True, "Validation successful"
|