File size: 4,297 Bytes
3d13fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1fdf681
3d13fcf
1fdf681
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
af6c470
 
 
 
 
6cf5862
af6c470
 
 
 
1fdf681
 
 
 
af6c470
1fdf681
 
 
af6c470
 
1fdf681
af6c470
 
1fdf681
 
3d13fcf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
import os
import gistyc
import requests
from dataclasses import dataclass
import re
import streamlit as st

@dataclass
class GistInfo:
    gist_id: str
    filename: str
    url: str
    model_name: str
    model_id: str
    model: str
    agieval: float
    gpt4all: float
    truthfulqa: float
    bigbench: float
    average: float


def update_gist(content, gist_id, access_token):
    """
    Update the content of a GitHub Gist.
    
    Args:
    content (str): The new content of the gist.
    gist_id (str): The ID of the gist to update.
    access_token (str): GitHub personal access token with gist permissions.
    """
    api_url = f"https://api.github.com/gists/{gist_id}"
    headers = {
        "Authorization": f"token {access_token}",
        "Accept": "application/vnd.github.v3+json"
    }
    data = {
        "files": {
            "YALL - Yet Another LLM Leaderboard.md": {
                "content": content
            }
        }
    }

    response = requests.patch(api_url, json=data, headers=headers)

    if response.status_code == 200:
        print("Gist updated successfully.")
    else:
        print("Failed to update gist. Status code:", response.status_code)
        print("Response:", response.json())


@st.cache_data 
def create_yall():
    # Get token
    GITHUB_API_TOKEN = os.environ.get("github")
    
    # Retrieve all gists
    gist_api = gistyc.GISTyc(auth_token=GITHUB_API_TOKEN)
    data = gist_api.get_gists()
    
    # List to store the GistInfo objects
    gist_infos = []

    for data_dict in data:
        if 'files' not in data_dict or not data_dict['files']:
            continue

        file_info = next(iter(data_dict['files'].values()))
        filename = file_info['filename']
        if not filename.endswith("-Nous.md"):
            continue

        raw_url = file_info['raw_url']
        response = requests.get(raw_url)
        if response.status_code != 200 or "Error: File does not exist" in response.text:
            continue

        # Parse the markdown table
        lines = response.text.split('\n')
        if len(lines) < 3:
            continue

        values = lines[2].split('|')[1:-1]

        # Extract model name and model id using regular expression
        model_match = re.search(r'\[([^\]]+)\]\(https://huggingface.co/([^/]+)/([^)]+)\)', values[0].strip())
        if model_match:
            model_name = model_match.group(1)
            model_id = f"{model_match.group(2)}/{model_match.group(3)}"
            print(values[0].strip())
            print(model_name)
            print(model_id)
            print("=============")
        else:
            model_name = model_id = 'Unknown'

        # Calculate the average
        try:
            agieval = float(values[1].strip())
            gpt4all = float(values[2].strip())
            bigbench = float(values[4].strip())
            average = round((agieval + gpt4all + bigbench) / 3, 2)
        except (ValueError, IndexError) as e:
            print(f"Error processing values: {e}")
            continue

        # Create a GistInfo object and add it to the list
        gist_info = GistInfo(
            gist_id=data_dict['id'],
            filename=filename,
            url=data_dict['html_url'],
            model_name=model_name,
            model_id=model_id,
            model=values[0].strip(),
            agieval=agieval,
            gpt4all=gpt4all,
            truthfulqa=float(values[3].strip()),
            bigbench=bigbench,
            average=average,
        )
        gist_infos.append(gist_info)
    
    # Sort the list by average
    gist_infos = sorted(gist_infos, key=lambda x: x.average, reverse=True)
    
    # Create markdown table
    markdown_table = "| Model | Average | AGIEval | GPT4All | TruthfulQA | Bigbench |\n"
    markdown_table += "|---|---:|---:|---:|---:|---:|\n"
    
    for gist in gist_infos:
        model_link = f"[{gist.model_id}](https://huggingface.co/{gist.model_id})"
        markdown_table += f"| {model_link} [📄]({gist.url}) | {gist.average} | {gist.agieval} | {gist.gpt4all} | {gist.truthfulqa} | {gist.bigbench} |\n"
    
    # Update YALL's gist
    update_gist(content=markdown_table, gist_id="90294929a2dbcb8877f9696f28105fdf", access_token=GITHUB_API_TOKEN)

    return markdown_table