Spaces:
Sleeping
Sleeping
import streamlit as st | |
import json | |
import pandas as pd | |
import plotly.express as px | |
# Define categories | |
CATEGORIES = ["Writing", "Roleplay", "Reasoning", "Math", "Coding", "Extraction", "STEM", "Humanities"] | |
# Load and process the single model data | |
def get_model_df(): | |
q2result = [] | |
# Replace "gpt-4_single.jsonl" with the actual path to your JSONL file | |
with open("data/gpt-4_single.jsonl", "r") as fin: | |
for line in fin: | |
obj = json.loads(line) | |
obj["category"] = CATEGORIES[(obj["question_id"] - 81) // 10] | |
q2result.append(obj) | |
df = pd.DataFrame(q2result) | |
return df | |
# Placeholder for the pair model data function | |
# Adapt this function based on how your "gpt-4_pair.jsonl" is structured | |
def get_model_df_pair(): | |
# Implement similar to get_model_df if you have pair data | |
return pd.DataFrame([]) # Placeholder | |
df = get_model_df() | |
df_pair = get_model_df_pair() | |
# Streamlit app starts here | |
st.title('Model Performance Visualization') | |
# Select models to display | |
all_models = df["model"].unique() | |
selected_models = st.multiselect('Select Models', all_models, default=all_models[:3]) | |
# Main app logic | |
if selected_models: | |
scores_all = [] | |
for model in selected_models: | |
for cat in CATEGORIES: | |
res = df[(df["category"] == cat) & (df["model"] == model) & (df["score"] >= 0)] | |
score = res["score"].mean() | |
scores_all.append({"model": model, "category": cat, "score": score}) | |
df_score = pd.DataFrame(scores_all) | |
# Renaming models for better visualization | |
rename_map = { | |
# Define your renaming map here, if needed | |
} | |
df_score.replace(rename_map, inplace=True) | |
# Generate the radial graph | |
fig = px.line_polar(df_score, r='score', theta='category', line_close=True, | |
category_orders={"category": CATEGORIES}, color='model', markers=True) | |
# Display the Plotly figure in Streamlit | |
st.plotly_chart(fig) | |