File size: 1,117 Bytes
f1d79c0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
import os
from pathlib import Path

import pandas as pd
import streamlit as st
import utils as ut

st.set_page_config(layout="wide")


st.markdown("# Elo Rating of Models")
st.markdown(
    """This app shows the Elo rating of models on the H4 Hub based on their performance on the H4 eval dataset. """)
st.markdown(
    """**Notes**
* This is currently using synthetic data
* You can tweak the number of tasks, models, and human rating per task to generate different datasets
"""
)
# user input

num_tasks = st.number_input("Number of tasks", min_value=1, max_value=5000, value=100)
num_models = st.number_input("Number of models", min_value=1, max_value=100, value=4)
num_human_ratings = st.number_input(
    "Number of human ratings per task", min_value=1, max_value=10, value=3
)

button = st.button("Show me the leaderboard!")

if button is True:
    # generate synthetic data
    df = ut.create_synthetic_data( n_tasks=num_tasks, n_models=num_models, n_ratings=num_human_ratings)
    # calculate elo rating
    elo_df = ut.calculate_elo_rating(df)
    # show leaderboard
    ut.display_leaderboard(elo_df)