Spaces:

RVikas
/

MTBenchExplorer

Sleeping

App Files Files Community

MTBenchExplorer / app.py

RVikas

Update app.py

8f4e737 verified over 1 year ago

raw

history blame contribute delete

2.02 kB

	import streamlit as st
	import json
	import pandas as pd
	import plotly.express as px

	# Define categories
	CATEGORIES = ["Writing", "Roleplay", "Reasoning", "Math", "Coding", "Extraction", "STEM", "Humanities"]

	# Load and process the single model data
	@st.cache_resource
	def get_model_df():
	q2result = []
	# Replace "gpt-4_single.jsonl" with the actual path to your JSONL file
	with open("data/gpt-4_single.jsonl", "r") as fin:
	for line in fin:
	obj = json.loads(line)
	obj["category"] = CATEGORIES[(obj["question_id"] - 81) // 10]
	q2result.append(obj)
	df = pd.DataFrame(q2result)
	return df

	# Placeholder for the pair model data function
	# Adapt this function based on how your "gpt-4_pair.jsonl" is structured
	@st.cache_resource
	def get_model_df_pair():
	# Implement similar to get_model_df if you have pair data
	return pd.DataFrame([]) # Placeholder

	df = get_model_df()
	df_pair = get_model_df_pair()

	# Streamlit app starts here
	st.title('Model Performance Visualization')

	# Select models to display
	all_models = df["model"].unique()
	selected_models = st.multiselect('Select Models', all_models, default=all_models[:3])

	# Main app logic
	if selected_models:
	scores_all = []
	for model in selected_models:
	for cat in CATEGORIES:
	res = df[(df["category"] == cat) & (df["model"] == model) & (df["score"] >= 0)]
	score = res["score"].mean()
	scores_all.append({"model": model, "category": cat, "score": score})

	df_score = pd.DataFrame(scores_all)

	# Renaming models for better visualization
	rename_map = {
	# Define your renaming map here, if needed
	}
	df_score.replace(rename_map, inplace=True)

	# Generate the radial graph
	fig = px.line_polar(df_score, r='score', theta='category', line_close=True,
	category_orders={"category": CATEGORIES}, color='model', markers=True)

	# Display the Plotly figure in Streamlit
	st.plotly_chart(fig)