import streamlit as st import json import pandas as pd import plotly.express as px # Define categories CATEGORIES = ["Writing", "Roleplay", "Reasoning", "Math", "Coding", "Extraction", "STEM", "Humanities"] # Load and process the single model data @st.cache_resource def get_model_df(): q2result = [] # Replace "gpt-4_single.jsonl" with the actual path to your JSONL file with open("data/gpt-4_single.jsonl", "r") as fin: for line in fin: obj = json.loads(line) obj["category"] = CATEGORIES[(obj["question_id"] - 81) // 10] q2result.append(obj) df = pd.DataFrame(q2result) return df # Placeholder for the pair model data function # Adapt this function based on how your "gpt-4_pair.jsonl" is structured @st.cache_resource def get_model_df_pair(): # Implement similar to get_model_df if you have pair data return pd.DataFrame([]) # Placeholder df = get_model_df() df_pair = get_model_df_pair() # Streamlit app starts here st.title('Model Performance Visualization') # Select models to display all_models = df["model"].unique() selected_models = st.multiselect('Select Models', all_models, default=all_models[:3]) # Main app logic if selected_models: scores_all = [] for model in selected_models: for cat in CATEGORIES: res = df[(df["category"] == cat) & (df["model"] == model) & (df["score"] >= 0)] score = res["score"].mean() scores_all.append({"model": model, "category": cat, "score": score}) df_score = pd.DataFrame(scores_all) # Renaming models for better visualization rename_map = { # Define your renaming map here, if needed } df_score.replace(rename_map, inplace=True) # Generate the radial graph fig = px.line_polar(df_score, r='score', theta='category', line_close=True, category_orders={"category": CATEGORIES}, color='model', markers=True) # Display the Plotly figure in Streamlit st.plotly_chart(fig)