Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
c9a7557
1
Parent(s):
d7a58e0
support loading leaderboard/votes from persistent source
Browse files- utils/leaderboard.py +14 -4
- utils/leaderboard/arena_elo_leaderboard.csv +9 -0
- utils/vote_logger.py +12 -2
- utils/votes/vote_details.csv +7 -0
utils/leaderboard.py
CHANGED
@@ -3,7 +3,7 @@ import pandas as pd
|
|
3 |
import math
|
4 |
from datetime import datetime
|
5 |
from .models import models
|
6 |
-
from huggingface_hub import CommitScheduler
|
7 |
|
8 |
# Default K-factor (determines how much a single match affects ratings)
|
9 |
DEFAULT_K_FACTOR = 32
|
@@ -12,11 +12,12 @@ DEFAULT_K_FACTOR = 32
|
|
12 |
DEFAULT_ELO = 1500
|
13 |
|
14 |
LEADERBOARD_FN = './utils/leaderboard/arena_elo_leaderboard.csv'
|
|
|
15 |
|
16 |
#csv_path = os.path.join('utils', 'arena_elo_leaderboard.csv')
|
17 |
|
18 |
leaderboard_scheduler = CommitScheduler(
|
19 |
-
repo_id=
|
20 |
folder_path="utils/leaderboard",
|
21 |
repo_type="dataset",
|
22 |
every=1
|
@@ -133,11 +134,19 @@ def load_leaderboard_data():
|
|
133 |
try:
|
134 |
# Define the path to the CSV file for leaderboard
|
135 |
csv_path = LEADERBOARD_FN
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
136 |
|
137 |
# Check if the file exists and load it
|
138 |
if os.path.exists(csv_path):
|
139 |
-
|
140 |
-
|
141 |
# Process the data into our structure
|
142 |
for _, row in df.iterrows():
|
143 |
model = row['model']
|
@@ -152,6 +161,7 @@ def load_leaderboard_data():
|
|
152 |
results["votes"] += results["wins"][model] + results["losses"][model] + results["ties"][model] // 2
|
153 |
else:
|
154 |
# If file doesn't exist, pre-populate with some reasonable data
|
|
|
155 |
from .models import model_names
|
156 |
for model in model_names:
|
157 |
results["wins"][model] = 0
|
|
|
3 |
import math
|
4 |
from datetime import datetime
|
5 |
from .models import models
|
6 |
+
from huggingface_hub import CommitScheduler, hf_hub_download
|
7 |
|
8 |
# Default K-factor (determines how much a single match affects ratings)
|
9 |
DEFAULT_K_FACTOR = 32
|
|
|
12 |
DEFAULT_ELO = 1500
|
13 |
|
14 |
LEADERBOARD_FN = './utils/leaderboard/arena_elo_leaderboard.csv'
|
15 |
+
REPO_ID = "aizip-dev/Arena-Metadata"
|
16 |
|
17 |
#csv_path = os.path.join('utils', 'arena_elo_leaderboard.csv')
|
18 |
|
19 |
leaderboard_scheduler = CommitScheduler(
|
20 |
+
repo_id=REPO_ID,
|
21 |
folder_path="utils/leaderboard",
|
22 |
repo_type="dataset",
|
23 |
every=1
|
|
|
134 |
try:
|
135 |
# Define the path to the CSV file for leaderboard
|
136 |
csv_path = LEADERBOARD_FN
|
137 |
+
hub_leaderboard_path = hf_hub_download(
|
138 |
+
repo_id=REPO_ID,
|
139 |
+
filename="arena_elo_leaderboard.csv",
|
140 |
+
repo_type="dataset",
|
141 |
+
)
|
142 |
+
df = pd.read_csv(hub_leaderboard_path)
|
143 |
+
print(f"Successfully loaded leaderboard from the Hub. {len(df)} models.")
|
144 |
+
df.to_csv(LEADERBOARD_FN, index=False)
|
145 |
+
print(f"Leaderboard copied to {LEADERBOARD_FN} for CommitScheduler.")
|
146 |
|
147 |
# Check if the file exists and load it
|
148 |
if os.path.exists(csv_path):
|
149 |
+
|
|
|
150 |
# Process the data into our structure
|
151 |
for _, row in df.iterrows():
|
152 |
model = row['model']
|
|
|
161 |
results["votes"] += results["wins"][model] + results["losses"][model] + results["ties"][model] // 2
|
162 |
else:
|
163 |
# If file doesn't exist, pre-populate with some reasonable data
|
164 |
+
print("Leaderboard file not found. Initializing with default values.")
|
165 |
from .models import model_names
|
166 |
for model in model_names:
|
167 |
results["wins"][model] = 0
|
utils/leaderboard/arena_elo_leaderboard.csv
ADDED
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
model,elo,wins,losses,ties,games_played,confidence_interval
|
2 |
+
Llama-3.2-1b-Instruct,1516.0,1,0,0,1,784.0
|
3 |
+
Qwen2.5-1.5b-Instruct,1500.0,0,0,0,0,inf
|
4 |
+
Qwen2.5-3b-Instruct,1500.0,0,0,0,1,784.0
|
5 |
+
Llama-3.2-3b-Instruct,1500.0,0,0,0,1,784.0
|
6 |
+
Gemma-3-1b-it,1500.0,0,0,0,0,inf
|
7 |
+
Gemma-2-2b-it,1500.0,0,0,0,1,784.0
|
8 |
+
IBM Granite-3.3-2b-instruct,1500.0,0,0,0,0,inf
|
9 |
+
Phi-4-mini-instruct,1484.0,0,1,0,2,554.4
|
utils/vote_logger.py
CHANGED
@@ -3,7 +3,7 @@ import csv
|
|
3 |
import json
|
4 |
from datetime import datetime
|
5 |
import pandas as pd
|
6 |
-
from huggingface_hub import CommitScheduler
|
7 |
|
8 |
FOLDER_PATH = "./utils/votes"
|
9 |
|
@@ -11,9 +11,19 @@ vote_details_scheduler = CommitScheduler(
|
|
11 |
repo_id="aizip-dev/Arena-Metadata",
|
12 |
folder_path="utils/votes",
|
13 |
repo_type="dataset",
|
14 |
-
every=1
|
15 |
)
|
16 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
17 |
def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
|
18 |
"""
|
19 |
Save detailed vote information to CSV file for future analysis.
|
|
|
3 |
import json
|
4 |
from datetime import datetime
|
5 |
import pandas as pd
|
6 |
+
from huggingface_hub import CommitScheduler, hf_hub_download
|
7 |
|
8 |
FOLDER_PATH = "./utils/votes"
|
9 |
|
|
|
11 |
repo_id="aizip-dev/Arena-Metadata",
|
12 |
folder_path="utils/votes",
|
13 |
repo_type="dataset",
|
14 |
+
every=1,
|
15 |
)
|
16 |
|
17 |
+
hub_votes = hf_hub_download(
|
18 |
+
repo_id="aizip-dev/Arena-Metadata",
|
19 |
+
filename="vote_details.csv",
|
20 |
+
repo_type="dataset",
|
21 |
+
revision="main",)
|
22 |
+
df = pd.read_csv(hub_votes)
|
23 |
+
print(f"Successfully loaded leaderboard from the Hub. {len(df)} models.")
|
24 |
+
df.to_csv(os.path.join(FOLDER_PATH, "vote_details.csv"), index=False)
|
25 |
+
print(f"Votes copied to {FOLDER_PATH} for CommitScheduler.")
|
26 |
+
|
27 |
def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
|
28 |
"""
|
29 |
Save detailed vote information to CSV file for future analysis.
|
utils/votes/vote_details.csv
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
timestamp,model_a,model_b,winner,feedback,question_id,question,insufficient_context,summary_a,summary_b
|
2 |
+
2025-05-08 14:10:08,Phi-4-mini-instruct,Qwen2.5-3b-Instruct,neither,"[""Model A: Incorrect refusal (if applicable)"", ""Model B: Incorrect refusal (if applicable)""]",unknown,Who was Samuel Worcester and why was he arrested?,True,"Samuel Worcester was a missionary and a member of the Church of Jesus Christ of Latter-day Saints (LDS Church) who was involved in the Indian Territory (present-day Oklahoma) in the mid-19th century. He was arrested due to his involvement in the Cherokee Nation's efforts to establish a separate state, which was in conflict with the United States government's policies at the time. His arrest was related to his role in the Cherokee Nation's resistance against the Indian Removal Act and his support for the Cherokee's legal efforts to maintain their sovereignty, including the use of the U.S. legal system to fight against the removal of their people. Samuel Worcester was eventually convicted and sentenced to imprisonment, which led to his death in custody.",Samuel Worcester was an American legal activist and missionary who opposed the forced relocation of Native Americans from their ancestral lands west of the Mississippi River during the 1830s. He was arrested along with other individuals for refusing to leave Indian Territory as part of the Indian Removal Act.
|
3 |
+
2025-05-08 14:14:43,Llama-3.2-1b-Instruct,Phi-4-mini-instruct,left,"[""Model A: More complete""]",unknown,What is the recommended minimum hours of sleep per night to avoid sleep deprivation?,False,"The query is answerable from the provided context. To determine the recommended minimum hours of sleep per night to avoid sleep deprivation, one would typically consult sleep hygiene guidelines or studies that focus on sleep duration and its effects on health.
|
4 |
+
|
5 |
+
The National Sleep Foundation recommends that adults aim for 7-9 hours of sleep per night for optimal health and functioning. However, individual sleep needs can vary based on factors such as age, lifestyle, and personal preferences.","The recommended minimum hours of sleep per night to avoid sleep deprivation is typically 7 to 9 hours. However, individual needs can vary, and it's important to listen to your body and adjust accordingly. If you have specific concerns or conditions, consulting a healthcare professional is advised."
|
6 |
+
2025-05-08 14:15:07,Gemma-2-2b-it,Llama-3.2-3b-Instruct,neither,"[""Model B: Incorrect refusal (if applicable)"", ""Model A: Incorrect refusal (if applicable)""]",unknown,What's the maritime equivalent of yelling 'surprise!' when rounding a blind corner?,False,"Context does not contain information about maritime equivalents of actions.
|
7 |
+
","I do not have enough information to answer this query. The context provided does not include any relevant information about maritime terminology, rounding a blind corner, or the concept of ""yelling'surprise!'"" in this context."
|