import os from pathlib import Path import pandas as pd import streamlit as st import utils as ut st.set_page_config(layout="wide") st.markdown("# Elo Rating of Models") st.markdown( """This app shows the Elo rating of models on the H4 Hub based on their performance on the H4 eval dataset. """) st.markdown( """**Notes** * This is currently using synthetic data * You can tweak the number of tasks, models, and human rating per task to generate different datasets """ ) # user input num_tasks = st.number_input("Number of tasks", min_value=1, max_value=5000, value=100) num_models = st.number_input("Number of models", min_value=1, max_value=100, value=4) num_human_ratings = st.number_input( "Number of human ratings per task", min_value=1, max_value=10, value=3 ) button = st.button("Show me the leaderboard!") if button is True: # generate synthetic data df = ut.create_synthetic_data( n_tasks=num_tasks, n_models=num_models, n_ratings=num_human_ratings) # calculate elo rating elo_df = ut.calculate_elo_rating(df) # show leaderboard ut.display_leaderboard(elo_df)