Spaces:
Running
Running
Update leaderboard.py
Browse files- leaderboard.py +38 -14
leaderboard.py
CHANGED
@@ -108,41 +108,64 @@ def get_human_readable_name(model_name: str) -> str:
|
|
108 |
|
109 |
def get_leaderboard():
|
110 |
leaderboard = load_leaderboard()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
111 |
sorted_results = sorted(
|
112 |
leaderboard.items(),
|
113 |
-
key=lambda x: (x[1]["
|
114 |
reverse=True
|
115 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
116 |
|
117 |
-
leaderboard_html = """
|
|
|
118 |
<style>
|
119 |
-
.leaderboard-table {
|
120 |
width: 100%;
|
121 |
border-collapse: collapse;
|
122 |
font-family: Arial, sans-serif;
|
123 |
-
}
|
124 |
-
.leaderboard-table th, .leaderboard-table td {
|
125 |
border: 1px solid #ddd;
|
126 |
padding: 8px;
|
127 |
text-align: left;
|
128 |
-
}
|
129 |
-
.leaderboard-table th {
|
130 |
background-color: rgba(255, 255, 255, 0.1);
|
131 |
font-weight: bold;
|
132 |
-
}
|
133 |
-
.rank-column {
|
134 |
width: 60px;
|
135 |
text-align: center;
|
136 |
-
}
|
137 |
-
.opponent-details {
|
138 |
font-size: 0.9em;
|
139 |
color: #888;
|
140 |
-
}
|
141 |
</style>
|
142 |
<table class='leaderboard-table'>
|
143 |
<tr>
|
144 |
<th class='rank-column'>Rank</th>
|
145 |
<th>Model</th>
|
|
|
146 |
<th>Wins</th>
|
147 |
<th>Losses</th>
|
148 |
<th>Win Rate</th>
|
@@ -170,6 +193,7 @@ def get_leaderboard():
|
|
170 |
<tr>
|
171 |
<td class='rank-column'>{rank_display}</td>
|
172 |
<td>{get_human_readable_name(model)}</td>
|
|
|
173 |
<td>{results['wins']}</td>
|
174 |
<td>{results['losses']}</td>
|
175 |
<td>{win_rate:.2f}%</td>
|
@@ -189,7 +213,7 @@ def get_elo_leaderboard():
|
|
189 |
min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
190 |
max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
191 |
|
192 |
-
|
193 |
<p style="font-size: 16px; margin-bottom: 20px;">
|
194 |
This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
|
195 |
Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
|
@@ -200,7 +224,7 @@ def get_elo_leaderboard():
|
|
200 |
"""
|
201 |
|
202 |
leaderboard_html = f"""
|
203 |
-
{
|
204 |
<style>
|
205 |
.elo-leaderboard-table {{
|
206 |
width: 100%;
|
|
|
108 |
|
109 |
def get_leaderboard():
|
110 |
leaderboard = load_leaderboard()
|
111 |
+
|
112 |
+
# Calculate scores for each model
|
113 |
+
for model, results in leaderboard.items():
|
114 |
+
total_battles = results["wins"] + results["losses"]
|
115 |
+
if total_battles > 0:
|
116 |
+
win_rate = results["wins"] / total_battles
|
117 |
+
results["score"] = win_rate * (1 - 1 / (total_battles + 1))
|
118 |
+
else:
|
119 |
+
results["score"] = 0
|
120 |
+
|
121 |
+
# Sort results by score, then by total battles
|
122 |
sorted_results = sorted(
|
123 |
leaderboard.items(),
|
124 |
+
key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
|
125 |
reverse=True
|
126 |
)
|
127 |
+
# Explanation of the main leaderboard
|
128 |
+
explanation = """
|
129 |
+
<p style="font-size: 16px; margin-bottom: 20px;">
|
130 |
+
This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
|
131 |
+
<br>
|
132 |
+
<strong>Score = Win Rate * (1 - 1 / (Total Battles + 1))</strong>
|
133 |
+
<br>
|
134 |
+
This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
|
135 |
+
</p>
|
136 |
+
"""
|
137 |
|
138 |
+
leaderboard_html = f"""
|
139 |
+
{explanation}
|
140 |
<style>
|
141 |
+
.leaderboard-table {{
|
142 |
width: 100%;
|
143 |
border-collapse: collapse;
|
144 |
font-family: Arial, sans-serif;
|
145 |
+
}}
|
146 |
+
.leaderboard-table th, .leaderboard-table td {{
|
147 |
border: 1px solid #ddd;
|
148 |
padding: 8px;
|
149 |
text-align: left;
|
150 |
+
}}
|
151 |
+
.leaderboard-table th {{
|
152 |
background-color: rgba(255, 255, 255, 0.1);
|
153 |
font-weight: bold;
|
154 |
+
}}
|
155 |
+
.rank-column {{
|
156 |
width: 60px;
|
157 |
text-align: center;
|
158 |
+
}}
|
159 |
+
.opponent-details {{
|
160 |
font-size: 0.9em;
|
161 |
color: #888;
|
162 |
+
}}
|
163 |
</style>
|
164 |
<table class='leaderboard-table'>
|
165 |
<tr>
|
166 |
<th class='rank-column'>Rank</th>
|
167 |
<th>Model</th>
|
168 |
+
<th>Score</th>
|
169 |
<th>Wins</th>
|
170 |
<th>Losses</th>
|
171 |
<th>Win Rate</th>
|
|
|
193 |
<tr>
|
194 |
<td class='rank-column'>{rank_display}</td>
|
195 |
<td>{get_human_readable_name(model)}</td>
|
196 |
+
<td>{results['score']:.4f}</td>
|
197 |
<td>{results['wins']}</td>
|
198 |
<td>{results['losses']}</td>
|
199 |
<td>{win_rate:.2f}%</td>
|
|
|
213 |
min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
214 |
max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
|
215 |
|
216 |
+
explanation_elo = f"""
|
217 |
<p style="font-size: 16px; margin-bottom: 20px;">
|
218 |
This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
|
219 |
Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
|
|
|
224 |
"""
|
225 |
|
226 |
leaderboard_html = f"""
|
227 |
+
{explanation_elo}
|
228 |
<style>
|
229 |
.elo-leaderboard-table {{
|
230 |
width: 100%;
|