File size: 2,144 Bytes
df32536
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65684de
df32536
 
 
 
 
698b772
df32536
 
 
 
 
 
698b772
df32536
 
 
 
65684de
 
 
 
 
df32536
65684de
df32536
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
import pandas as pd

MODEL_SIZE = ['<4B', '4B-10B', '10B-20B', '20B-40B', '>40B', 'Unknown']
MODEL_TYPE = ['API', 'Open Source']
search_default_val = 'Model name search (fuzzy, case insensitive)'

data_filepath = 'leaderboard_data.csv'

def build_df():
    raw_df = pd.read_csv(data_filepath)
    df = pd.DataFrame()
    df['_name'] = raw_df['name']

    names_html = []
    for _, row in raw_df.iterrows():
        name = row['name']
        if row['clickable']:
            hfp = row['hf_path']
            names_html.append(f'<a href="https://huggingface.co/{hfp}" target="_blank">{name}</a>')
        else:
            names_html.append(name)
    df['Name'] = names_html

    df['Eval Date'] = raw_df['eval_date']
    df['Params (B)'] = [round(params / 1e9, 1) for params in raw_df['num_params']]
    df['_parameters'] = raw_df['num_params']

    type_map = {
        'Rank': 'number',
        'Name': 'html',
        'Eval Date': 'html',
        'Params (B)': 'number',
    }

    for coln in raw_df.columns:
        if coln.startswith('SCORE '):
            newc = coln[6:]
            if newc == 'overall':
                newc = '⭐ Overall'
            df[newc] = raw_df[coln].round(2)
            type_map[newc] = 'number'

    df.sort_values('⭐ Overall', ascending=False, inplace=True)
    df.reset_index(drop=True, inplace=True)
    df.insert(0, 'Rank', list(range(1, len(df)+1)))

    return df, type_map


def model_size_flag(sz, FIELDS):
    if pd.isna(sz) and 'Unknown' in FIELDS:
        return True
    if pd.isna(sz):
        return False
    if '<4B' in FIELDS and sz < 4e9:
        return True
    if '4B-10B' in FIELDS and sz >= 4e9 and sz < 1e10:
        return True
    if '10B-20B' in FIELDS and sz >= 1e10 and sz < 2e10:
        return True
    if '20B-40B' in FIELDS and sz >= 2e10 and sz < 4e10:
        return True
    if '>40B' in FIELDS and sz >= 4e10:
        return True
    return False


def model_type_flag(line, FIELDS):
    if 'Open Source' in FIELDS and not pd.isna(line['_parameters']):
        return True
    if 'API' in FIELDS and pd.isna(line['_parameters']):
        return True
    return False