Spaces:
Runtime error
Runtime error
fix elo scoring
Browse files- calculate_elo.py +69 -35
calculate_elo.py
CHANGED
@@ -4,7 +4,8 @@ from datetime import datetime
|
|
4 |
from decimal import Decimal
|
5 |
|
6 |
import boto3
|
7 |
-
from boto3.dynamodb.conditions import Attr
|
|
|
8 |
|
9 |
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
|
10 |
|
@@ -89,6 +90,10 @@ def _create_elo_logs_table():
|
|
89 |
'AttributeName': 'arena_battle_id',
|
90 |
'KeyType': 'HASH' # Partition key
|
91 |
},
|
|
|
|
|
|
|
|
|
92 |
],
|
93 |
AttributeDefinitions=[
|
94 |
{
|
@@ -99,6 +104,10 @@ def _create_elo_logs_table():
|
|
99 |
'AttributeName': 'battle_timestamp',
|
100 |
'AttributeType': 'S'
|
101 |
},
|
|
|
|
|
|
|
|
|
102 |
],
|
103 |
ProvisionedThroughput={
|
104 |
'ReadCapacityUnits': 10,
|
@@ -106,12 +115,16 @@ def _create_elo_logs_table():
|
|
106 |
},
|
107 |
GlobalSecondaryIndexes=[
|
108 |
{
|
109 |
-
'IndexName': '
|
110 |
'KeySchema': [
|
111 |
{
|
112 |
-
'AttributeName': '
|
113 |
'KeyType': 'HASH' # Partition key for the GSI
|
114 |
},
|
|
|
|
|
|
|
|
|
115 |
],
|
116 |
'Projection': {
|
117 |
'ProjectionType': 'ALL'
|
@@ -157,36 +170,15 @@ def calculate_elo(rating1, rating2, result, K=32):
|
|
157 |
def get_last_processed_timestamp():
|
158 |
table = dynamodb.Table('elo_logs')
|
159 |
|
160 |
-
response = table.update (
|
161 |
-
AttributeDefinitions=[
|
162 |
-
{
|
163 |
-
'AttributeName': 'timestamp',
|
164 |
-
'AttributeType': 'S'
|
165 |
-
},
|
166 |
-
],
|
167 |
-
GlobalSecondaryIndexUpdates=[
|
168 |
-
{
|
169 |
-
'Create': {
|
170 |
-
'IndexName': 'TimestampIndex',
|
171 |
-
'KeySchema': [
|
172 |
-
{
|
173 |
-
'AttributeName': 'timestamp',
|
174 |
-
'KeyType': 'RANGE'
|
175 |
-
},
|
176 |
-
],
|
177 |
-
'Projection': {
|
178 |
-
'ProjectionType': 'ALL',
|
179 |
-
}
|
180 |
-
},
|
181 |
-
},
|
182 |
-
]
|
183 |
-
)
|
184 |
-
|
185 |
# Scan the table sorted by timestamp in descending order
|
186 |
-
response = table.
|
187 |
-
|
188 |
-
|
|
|
|
|
189 |
)
|
|
|
|
|
190 |
|
191 |
# If there are no items in the table, return a default timestamp
|
192 |
if not response['Items']:
|
@@ -207,7 +199,8 @@ def log_elo_update(arena_battle_id, battle_timestamp, new_rating1, new_rating2):
|
|
207 |
'battle_timestamp': battle_timestamp, # Use the timestamp of the battle
|
208 |
'log_timestamp': datetime.now().isoformat(), # Also store the timestamp of the log for completeness
|
209 |
'new_rating1': new_rating1,
|
210 |
-
'new_rating2': new_rating2
|
|
|
211 |
}
|
212 |
)
|
213 |
|
@@ -238,9 +231,41 @@ def update_elo_score(chatbot_name, new_elo_score):
|
|
238 |
)
|
239 |
|
240 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
241 |
def main():
|
242 |
-
#
|
243 |
-
|
|
|
|
|
244 |
battles = get_unprocessed_battles(last_processed_timestamp)
|
245 |
|
246 |
elo_scores = {}
|
@@ -262,15 +287,24 @@ def main():
|
|
262 |
elo_result = 0
|
263 |
|
264 |
new_rating1, new_rating2 = calculate_elo(elo_scores[battle['choice1_name']], elo_scores[battle['choice2_name']], elo_result)
|
|
|
265 |
elo_scores[battle['choice1_name']] = new_rating1
|
266 |
elo_scores[battle['choice2_name']] = new_rating2
|
267 |
log_elo_update(battle['arena_battle_id'], battle['timestamp'], new_rating1, new_rating2)
|
268 |
-
logging.info(f"{battle['choice1_name']}: {elo_scores[battle['choice1_name']]} -> {new_rating1} | {battle['choice2_name']}: {elo_scores[battle['choice2_name']]} -> {new_rating2}")
|
269 |
update_elo_score(battle['choice1_name'], new_rating1)
|
270 |
update_elo_score(battle['choice2_name'], new_rating2)
|
271 |
elo_scores[battle['choice1_name']] = new_rating1
|
272 |
elo_scores[battle['choice2_name']] = new_rating2
|
273 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
274 |
|
275 |
if __name__ == "__main__":
|
276 |
main()
|
|
|
4 |
from decimal import Decimal
|
5 |
|
6 |
import boto3
|
7 |
+
from boto3.dynamodb.conditions import Attr, Key
|
8 |
+
from datasets import Dataset
|
9 |
|
10 |
logging.basicConfig(level=os.getenv("LOG_LEVEL", "INFO"))
|
11 |
|
|
|
90 |
'AttributeName': 'arena_battle_id',
|
91 |
'KeyType': 'HASH' # Partition key
|
92 |
},
|
93 |
+
{
|
94 |
+
'AttributeName': 'battle_timestamp',
|
95 |
+
'KeyType': 'RANGE' # Sort key
|
96 |
+
},
|
97 |
],
|
98 |
AttributeDefinitions=[
|
99 |
{
|
|
|
104 |
'AttributeName': 'battle_timestamp',
|
105 |
'AttributeType': 'S'
|
106 |
},
|
107 |
+
{
|
108 |
+
'AttributeName': 'all',
|
109 |
+
'AttributeType': 'S'
|
110 |
+
}
|
111 |
],
|
112 |
ProvisionedThroughput={
|
113 |
'ReadCapacityUnits': 10,
|
|
|
115 |
},
|
116 |
GlobalSecondaryIndexes=[
|
117 |
{
|
118 |
+
'IndexName': 'AllTimestampIndex',
|
119 |
'KeySchema': [
|
120 |
{
|
121 |
+
'AttributeName': 'all',
|
122 |
'KeyType': 'HASH' # Partition key for the GSI
|
123 |
},
|
124 |
+
{
|
125 |
+
'AttributeName': 'battle_timestamp',
|
126 |
+
'KeyType': 'RANGE' # Sort key for the GSI
|
127 |
+
}
|
128 |
],
|
129 |
'Projection': {
|
130 |
'ProjectionType': 'ALL'
|
|
|
170 |
def get_last_processed_timestamp():
|
171 |
table = dynamodb.Table('elo_logs')
|
172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
173 |
# Scan the table sorted by timestamp in descending order
|
174 |
+
response = table.query(
|
175 |
+
IndexName='AllTimestampIndex',
|
176 |
+
KeyConditionExpression=Key('all').eq('ALL'),
|
177 |
+
ScanIndexForward=False,
|
178 |
+
Limit=1
|
179 |
)
|
180 |
+
print(response)
|
181 |
+
# exit(0)
|
182 |
|
183 |
# If there are no items in the table, return a default timestamp
|
184 |
if not response['Items']:
|
|
|
199 |
'battle_timestamp': battle_timestamp, # Use the timestamp of the battle
|
200 |
'log_timestamp': datetime.now().isoformat(), # Also store the timestamp of the log for completeness
|
201 |
'new_rating1': new_rating1,
|
202 |
+
'new_rating2': new_rating2,
|
203 |
+
'all': 'ALL',
|
204 |
}
|
205 |
)
|
206 |
|
|
|
231 |
)
|
232 |
|
233 |
|
234 |
+
def get_elo_scores():
|
235 |
+
table = dynamodb.Table('elo_scores')
|
236 |
+
|
237 |
+
response = table.scan()
|
238 |
+
data = response['Items']
|
239 |
+
|
240 |
+
return data
|
241 |
+
|
242 |
+
|
243 |
+
def _backfill_logs():
|
244 |
+
table = dynamodb.Table('elo_logs')
|
245 |
+
|
246 |
+
# Initialize the scan operation
|
247 |
+
response = table.scan()
|
248 |
+
|
249 |
+
for item in response['Items']:
|
250 |
+
table.update_item(
|
251 |
+
Key={
|
252 |
+
'arena_battle_id': item['arena_battle_id'],
|
253 |
+
'battle_timestamp': item['battle_timestamp']
|
254 |
+
},
|
255 |
+
UpdateExpression="SET #all = :value",
|
256 |
+
ExpressionAttributeNames={
|
257 |
+
'#all': 'all'
|
258 |
+
},
|
259 |
+
ExpressionAttributeValues={
|
260 |
+
':value': 'ALL'
|
261 |
+
}
|
262 |
+
)
|
263 |
+
|
264 |
def main():
|
265 |
+
# _backfill_logs()
|
266 |
+
# _create_elo_logs_table()
|
267 |
+
last_processed_timestamp = get_last_processed_timestamp()
|
268 |
+
# last_processed_timestamp = '1970-01-01T00:00:00'
|
269 |
battles = get_unprocessed_battles(last_processed_timestamp)
|
270 |
|
271 |
elo_scores = {}
|
|
|
287 |
elo_result = 0
|
288 |
|
289 |
new_rating1, new_rating2 = calculate_elo(elo_scores[battle['choice1_name']], elo_scores[battle['choice2_name']], elo_result)
|
290 |
+
logging.info(f"{battle['choice1_name']}: {elo_scores[battle['choice1_name']]} -> {new_rating1} | {battle['choice2_name']}: {elo_scores[battle['choice2_name']]} -> {new_rating2}")
|
291 |
elo_scores[battle['choice1_name']] = new_rating1
|
292 |
elo_scores[battle['choice2_name']] = new_rating2
|
293 |
log_elo_update(battle['arena_battle_id'], battle['timestamp'], new_rating1, new_rating2)
|
|
|
294 |
update_elo_score(battle['choice1_name'], new_rating1)
|
295 |
update_elo_score(battle['choice2_name'], new_rating2)
|
296 |
elo_scores[battle['choice1_name']] = new_rating1
|
297 |
elo_scores[battle['choice2_name']] = new_rating2
|
298 |
|
299 |
+
elo_scores = get_elo_scores()
|
300 |
+
for i, j in enumerate(elo_scores):
|
301 |
+
j["elo_score"] = float(j["elo_score"])
|
302 |
+
elo_scores[i] = j
|
303 |
+
|
304 |
+
# Convert the data into a format suitable for Hugging Face Dataset
|
305 |
+
elo_dataset = Dataset.from_list(elo_scores)
|
306 |
+
elo_dataset.push_to_hub("openaccess-ai-collective/chatbot-arena-elo-scores", private=False)
|
307 |
+
|
308 |
|
309 |
if __name__ == "__main__":
|
310 |
main()
|