k-mktr commited on
Commit
0efd625
·
verified ·
1 Parent(s): 289638f

Update leaderboard.py

Browse files
Files changed (1) hide show
  1. leaderboard.py +38 -14
leaderboard.py CHANGED
@@ -108,41 +108,64 @@ def get_human_readable_name(model_name: str) -> str:
108
 
109
  def get_leaderboard():
110
  leaderboard = load_leaderboard()
 
 
 
 
 
 
 
 
 
 
 
111
  sorted_results = sorted(
112
  leaderboard.items(),
113
- key=lambda x: (x[1]["wins"] / (x[1]["wins"] + x[1]["losses"]) if x[1]["wins"] + x[1]["losses"] > 0 else 0, x[1]["wins"] + x[1]["losses"]),
114
  reverse=True
115
  )
 
 
 
 
 
 
 
 
 
 
116
 
117
- leaderboard_html = """
 
118
  <style>
119
- .leaderboard-table {
120
  width: 100%;
121
  border-collapse: collapse;
122
  font-family: Arial, sans-serif;
123
- }
124
- .leaderboard-table th, .leaderboard-table td {
125
  border: 1px solid #ddd;
126
  padding: 8px;
127
  text-align: left;
128
- }
129
- .leaderboard-table th {
130
  background-color: rgba(255, 255, 255, 0.1);
131
  font-weight: bold;
132
- }
133
- .rank-column {
134
  width: 60px;
135
  text-align: center;
136
- }
137
- .opponent-details {
138
  font-size: 0.9em;
139
  color: #888;
140
- }
141
  </style>
142
  <table class='leaderboard-table'>
143
  <tr>
144
  <th class='rank-column'>Rank</th>
145
  <th>Model</th>
 
146
  <th>Wins</th>
147
  <th>Losses</th>
148
  <th>Win Rate</th>
@@ -170,6 +193,7 @@ def get_leaderboard():
170
  <tr>
171
  <td class='rank-column'>{rank_display}</td>
172
  <td>{get_human_readable_name(model)}</td>
 
173
  <td>{results['wins']}</td>
174
  <td>{results['losses']}</td>
175
  <td>{win_rate:.2f}%</td>
@@ -189,7 +213,7 @@ def get_elo_leaderboard():
189
  min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
190
  max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
191
 
192
- explanation = f"""
193
  <p style="font-size: 16px; margin-bottom: 20px;">
194
  This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
195
  Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
@@ -200,7 +224,7 @@ def get_elo_leaderboard():
200
  """
201
 
202
  leaderboard_html = f"""
203
- {explanation}
204
  <style>
205
  .elo-leaderboard-table {{
206
  width: 100%;
 
108
 
109
  def get_leaderboard():
110
  leaderboard = load_leaderboard()
111
+
112
+ # Calculate scores for each model
113
+ for model, results in leaderboard.items():
114
+ total_battles = results["wins"] + results["losses"]
115
+ if total_battles > 0:
116
+ win_rate = results["wins"] / total_battles
117
+ results["score"] = win_rate * (1 - 1 / (total_battles + 1))
118
+ else:
119
+ results["score"] = 0
120
+
121
+ # Sort results by score, then by total battles
122
  sorted_results = sorted(
123
  leaderboard.items(),
124
+ key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
125
  reverse=True
126
  )
127
+ # Explanation of the main leaderboard
128
+ explanation = """
129
+ <p style="font-size: 16px; margin-bottom: 20px;">
130
+ This leaderboard uses a scoring system that balances win rate and total battles. The score is calculated using the formula:
131
+ <br>
132
+ <strong>Score = Win Rate * (1 - 1 / (Total Battles + 1))</strong>
133
+ <br>
134
+ This formula rewards models with higher win rates and more battles. As the number of battles increases, the score approaches the win rate.
135
+ </p>
136
+ """
137
 
138
+ leaderboard_html = f"""
139
+ {explanation}
140
  <style>
141
+ .leaderboard-table {{
142
  width: 100%;
143
  border-collapse: collapse;
144
  font-family: Arial, sans-serif;
145
+ }}
146
+ .leaderboard-table th, .leaderboard-table td {{
147
  border: 1px solid #ddd;
148
  padding: 8px;
149
  text-align: left;
150
+ }}
151
+ .leaderboard-table th {{
152
  background-color: rgba(255, 255, 255, 0.1);
153
  font-weight: bold;
154
+ }}
155
+ .rank-column {{
156
  width: 60px;
157
  text-align: center;
158
+ }}
159
+ .opponent-details {{
160
  font-size: 0.9em;
161
  color: #888;
162
+ }}
163
  </style>
164
  <table class='leaderboard-table'>
165
  <tr>
166
  <th class='rank-column'>Rank</th>
167
  <th>Model</th>
168
+ <th>Score</th>
169
  <th>Wins</th>
170
  <th>Losses</th>
171
  <th>Win Rate</th>
 
193
  <tr>
194
  <td class='rank-column'>{rank_display}</td>
195
  <td>{get_human_readable_name(model)}</td>
196
+ <td>{results['score']:.4f}</td>
197
  <td>{results['wins']}</td>
198
  <td>{results['losses']}</td>
199
  <td>{win_rate:.2f}%</td>
 
213
  min_initial_rating = min(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
214
  max_initial_rating = max(1000 + (get_model_size(model) * 100) for model, _ in arena_config.APPROVED_MODELS)
215
 
216
+ explanation_elo = f"""
217
  <p style="font-size: 16px; margin-bottom: 20px;">
218
  This leaderboard uses a modified ELO rating system that takes into account both the performance and size of the models.
219
  Initial ratings range from {round(min_initial_rating)} to {round(max_initial_rating)} points, based on model size, with larger models starting at higher ratings.
 
224
  """
225
 
226
  leaderboard_html = f"""
227
+ {explanation_elo}
228
  <style>
229
  .elo-leaderboard-table {{
230
  width: 100%;