oliver-aizip commited on
Commit
c9a7557
·
1 Parent(s): d7a58e0

support loading leaderboard/votes from persistent source

Browse files
utils/leaderboard.py CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
3
  import math
4
  from datetime import datetime
5
  from .models import models
6
- from huggingface_hub import CommitScheduler
7
 
8
  # Default K-factor (determines how much a single match affects ratings)
9
  DEFAULT_K_FACTOR = 32
@@ -12,11 +12,12 @@ DEFAULT_K_FACTOR = 32
12
  DEFAULT_ELO = 1500
13
 
14
  LEADERBOARD_FN = './utils/leaderboard/arena_elo_leaderboard.csv'
 
15
 
16
  #csv_path = os.path.join('utils', 'arena_elo_leaderboard.csv')
17
 
18
  leaderboard_scheduler = CommitScheduler(
19
- repo_id="aizip-dev/Arena-Metadata",
20
  folder_path="utils/leaderboard",
21
  repo_type="dataset",
22
  every=1
@@ -133,11 +134,19 @@ def load_leaderboard_data():
133
  try:
134
  # Define the path to the CSV file for leaderboard
135
  csv_path = LEADERBOARD_FN
 
 
 
 
 
 
 
 
 
136
 
137
  # Check if the file exists and load it
138
  if os.path.exists(csv_path):
139
- df = pd.read_csv(csv_path)
140
-
141
  # Process the data into our structure
142
  for _, row in df.iterrows():
143
  model = row['model']
@@ -152,6 +161,7 @@ def load_leaderboard_data():
152
  results["votes"] += results["wins"][model] + results["losses"][model] + results["ties"][model] // 2
153
  else:
154
  # If file doesn't exist, pre-populate with some reasonable data
 
155
  from .models import model_names
156
  for model in model_names:
157
  results["wins"][model] = 0
 
3
  import math
4
  from datetime import datetime
5
  from .models import models
6
+ from huggingface_hub import CommitScheduler, hf_hub_download
7
 
8
  # Default K-factor (determines how much a single match affects ratings)
9
  DEFAULT_K_FACTOR = 32
 
12
  DEFAULT_ELO = 1500
13
 
14
  LEADERBOARD_FN = './utils/leaderboard/arena_elo_leaderboard.csv'
15
+ REPO_ID = "aizip-dev/Arena-Metadata"
16
 
17
  #csv_path = os.path.join('utils', 'arena_elo_leaderboard.csv')
18
 
19
  leaderboard_scheduler = CommitScheduler(
20
+ repo_id=REPO_ID,
21
  folder_path="utils/leaderboard",
22
  repo_type="dataset",
23
  every=1
 
134
  try:
135
  # Define the path to the CSV file for leaderboard
136
  csv_path = LEADERBOARD_FN
137
+ hub_leaderboard_path = hf_hub_download(
138
+ repo_id=REPO_ID,
139
+ filename="arena_elo_leaderboard.csv",
140
+ repo_type="dataset",
141
+ )
142
+ df = pd.read_csv(hub_leaderboard_path)
143
+ print(f"Successfully loaded leaderboard from the Hub. {len(df)} models.")
144
+ df.to_csv(LEADERBOARD_FN, index=False)
145
+ print(f"Leaderboard copied to {LEADERBOARD_FN} for CommitScheduler.")
146
 
147
  # Check if the file exists and load it
148
  if os.path.exists(csv_path):
149
+
 
150
  # Process the data into our structure
151
  for _, row in df.iterrows():
152
  model = row['model']
 
161
  results["votes"] += results["wins"][model] + results["losses"][model] + results["ties"][model] // 2
162
  else:
163
  # If file doesn't exist, pre-populate with some reasonable data
164
+ print("Leaderboard file not found. Initializing with default values.")
165
  from .models import model_names
166
  for model in model_names:
167
  results["wins"][model] = 0
utils/leaderboard/arena_elo_leaderboard.csv ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ model,elo,wins,losses,ties,games_played,confidence_interval
2
+ Llama-3.2-1b-Instruct,1516.0,1,0,0,1,784.0
3
+ Qwen2.5-1.5b-Instruct,1500.0,0,0,0,0,inf
4
+ Qwen2.5-3b-Instruct,1500.0,0,0,0,1,784.0
5
+ Llama-3.2-3b-Instruct,1500.0,0,0,0,1,784.0
6
+ Gemma-3-1b-it,1500.0,0,0,0,0,inf
7
+ Gemma-2-2b-it,1500.0,0,0,0,1,784.0
8
+ IBM Granite-3.3-2b-instruct,1500.0,0,0,0,0,inf
9
+ Phi-4-mini-instruct,1484.0,0,1,0,2,554.4
utils/vote_logger.py CHANGED
@@ -3,7 +3,7 @@ import csv
3
  import json
4
  from datetime import datetime
5
  import pandas as pd
6
- from huggingface_hub import CommitScheduler
7
 
8
  FOLDER_PATH = "./utils/votes"
9
 
@@ -11,9 +11,19 @@ vote_details_scheduler = CommitScheduler(
11
  repo_id="aizip-dev/Arena-Metadata",
12
  folder_path="utils/votes",
13
  repo_type="dataset",
14
- every=1
15
  )
16
 
 
 
 
 
 
 
 
 
 
 
17
  def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
18
  """
19
  Save detailed vote information to CSV file for future analysis.
 
3
  import json
4
  from datetime import datetime
5
  import pandas as pd
6
+ from huggingface_hub import CommitScheduler, hf_hub_download
7
 
8
  FOLDER_PATH = "./utils/votes"
9
 
 
11
  repo_id="aizip-dev/Arena-Metadata",
12
  folder_path="utils/votes",
13
  repo_type="dataset",
14
+ every=1,
15
  )
16
 
17
+ hub_votes = hf_hub_download(
18
+ repo_id="aizip-dev/Arena-Metadata",
19
+ filename="vote_details.csv",
20
+ repo_type="dataset",
21
+ revision="main",)
22
+ df = pd.read_csv(hub_votes)
23
+ print(f"Successfully loaded leaderboard from the Hub. {len(df)} models.")
24
+ df.to_csv(os.path.join(FOLDER_PATH, "vote_details.csv"), index=False)
25
+ print(f"Votes copied to {FOLDER_PATH} for CommitScheduler.")
26
+
27
  def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
28
  """
29
  Save detailed vote information to CSV file for future analysis.
utils/votes/vote_details.csv ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ timestamp,model_a,model_b,winner,feedback,question_id,question,insufficient_context,summary_a,summary_b
2
+ 2025-05-08 14:10:08,Phi-4-mini-instruct,Qwen2.5-3b-Instruct,neither,"[""Model A: Incorrect refusal (if applicable)"", ""Model B: Incorrect refusal (if applicable)""]",unknown,Who was Samuel Worcester and why was he arrested?,True,"Samuel Worcester was a missionary and a member of the Church of Jesus Christ of Latter-day Saints (LDS Church) who was involved in the Indian Territory (present-day Oklahoma) in the mid-19th century. He was arrested due to his involvement in the Cherokee Nation's efforts to establish a separate state, which was in conflict with the United States government's policies at the time. His arrest was related to his role in the Cherokee Nation's resistance against the Indian Removal Act and his support for the Cherokee's legal efforts to maintain their sovereignty, including the use of the U.S. legal system to fight against the removal of their people. Samuel Worcester was eventually convicted and sentenced to imprisonment, which led to his death in custody.",Samuel Worcester was an American legal activist and missionary who opposed the forced relocation of Native Americans from their ancestral lands west of the Mississippi River during the 1830s. He was arrested along with other individuals for refusing to leave Indian Territory as part of the Indian Removal Act.
3
+ 2025-05-08 14:14:43,Llama-3.2-1b-Instruct,Phi-4-mini-instruct,left,"[""Model A: More complete""]",unknown,What is the recommended minimum hours of sleep per night to avoid sleep deprivation?,False,"The query is answerable from the provided context. To determine the recommended minimum hours of sleep per night to avoid sleep deprivation, one would typically consult sleep hygiene guidelines or studies that focus on sleep duration and its effects on health.
4
+
5
+ The National Sleep Foundation recommends that adults aim for 7-9 hours of sleep per night for optimal health and functioning. However, individual sleep needs can vary based on factors such as age, lifestyle, and personal preferences.","The recommended minimum hours of sleep per night to avoid sleep deprivation is typically 7 to 9 hours. However, individual needs can vary, and it's important to listen to your body and adjust accordingly. If you have specific concerns or conditions, consulting a healthcare professional is advised."
6
+ 2025-05-08 14:15:07,Gemma-2-2b-it,Llama-3.2-3b-Instruct,neither,"[""Model B: Incorrect refusal (if applicable)"", ""Model A: Incorrect refusal (if applicable)""]",unknown,What's the maritime equivalent of yelling 'surprise!' when rounding a blind corner?,False,"Context does not contain information about maritime equivalents of actions.
7
+ ","I do not have enough information to answer this query. The context provided does not include any relevant information about maritime terminology, rounding a blind corner, or the concept of ""yelling'surprise!'"" in this context."