k-mktr commited on
Commit
e19f726
β€’
1 Parent(s): 1bf9780

Improved the score logic

Browse files

We calculate a score for each model using the formula: `score = win_rate * (1 - 1 / (total_battles + 1))`. This formula balances win rate with the number of battles, giving more weight to models that have participated in more battles.

Files changed (1) hide show
  1. app.py +18 -3
app.py CHANGED
@@ -98,10 +98,8 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
98
 
99
  result_message = f"""
100
  πŸŽ‰ Vote recorded! You're awesome! 🌟
101
-
102
  πŸ”΅ In the left corner: {get_human_readable_name(left_model)}
103
  πŸ”΄ In the right corner: {get_human_readable_name(right_model)}
104
-
105
  πŸ† And the champion you picked is... {get_human_readable_name(winner)}! πŸ₯‡
106
  """
107
 
@@ -116,11 +114,25 @@ def record_vote(prompt, left_response, right_response, left_model, right_model,
116
 
117
  def get_leaderboard():
118
  battle_results = get_current_leaderboard()
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  sorted_results = sorted(
120
  battle_results.items(),
121
- key=lambda x: (x[1]["wins"], -x[1]["losses"]),
122
  reverse=True
123
  )
 
124
  leaderboard = """
125
  <style>
126
  .leaderboard-table {
@@ -150,6 +162,7 @@ def get_leaderboard():
150
  <tr>
151
  <th class='rank-column'>Rank</th>
152
  <th>Model</th>
 
153
  <th>Wins</th>
154
  <th>Losses</th>
155
  <th>Win Rate</th>
@@ -158,6 +171,7 @@ def get_leaderboard():
158
  <th>Toughest Opponent</th>
159
  </tr>
160
  """
 
161
  for index, (model, results) in enumerate(sorted_results, start=1):
162
  total_battles = results["wins"] + results["losses"]
163
  win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
@@ -185,6 +199,7 @@ def get_leaderboard():
185
  <tr>
186
  <td class='rank-column'>{rank_display}</td>
187
  <td>{get_human_readable_name(model)}</td>
 
188
  <td>{results['wins']}</td>
189
  <td>{results['losses']}</td>
190
  <td>{win_rate:.2f}%</td>
 
98
 
99
  result_message = f"""
100
  πŸŽ‰ Vote recorded! You're awesome! 🌟
 
101
  πŸ”΅ In the left corner: {get_human_readable_name(left_model)}
102
  πŸ”΄ In the right corner: {get_human_readable_name(right_model)}
 
103
  πŸ† And the champion you picked is... {get_human_readable_name(winner)}! πŸ₯‡
104
  """
105
 
 
114
 
115
  def get_leaderboard():
116
  battle_results = get_current_leaderboard()
117
+
118
+ # Calculate scores for each model
119
+ for model, results in battle_results.items():
120
+ total_battles = results["wins"] + results["losses"]
121
+ if total_battles > 0:
122
+ win_rate = results["wins"] / total_battles
123
+ # Score formula: win_rate * (1 - 1 / (total_battles + 1))
124
+ # This gives more weight to models with more battles
125
+ results["score"] = win_rate * (1 - 1 / (total_battles + 1))
126
+ else:
127
+ results["score"] = 0
128
+
129
+ # Sort results by score, then by total battles
130
  sorted_results = sorted(
131
  battle_results.items(),
132
+ key=lambda x: (x[1]["score"], x[1]["wins"] + x[1]["losses"]),
133
  reverse=True
134
  )
135
+
136
  leaderboard = """
137
  <style>
138
  .leaderboard-table {
 
162
  <tr>
163
  <th class='rank-column'>Rank</th>
164
  <th>Model</th>
165
+ <th>Score</th>
166
  <th>Wins</th>
167
  <th>Losses</th>
168
  <th>Win Rate</th>
 
171
  <th>Toughest Opponent</th>
172
  </tr>
173
  """
174
+
175
  for index, (model, results) in enumerate(sorted_results, start=1):
176
  total_battles = results["wins"] + results["losses"]
177
  win_rate = (results["wins"] / total_battles * 100) if total_battles > 0 else 0
 
199
  <tr>
200
  <td class='rank-column'>{rank_display}</td>
201
  <td>{get_human_readable_name(model)}</td>
202
+ <td>{results['score']:.4f}</td>
203
  <td>{results['wins']}</td>
204
  <td>{results['losses']}</td>
205
  <td>{win_rate:.2f}%</td>