SLM-RAG-Arena

Running on Zero

App Files Files Community

oliver-aizip commited on 29 days ago

Commit

c9a7557

1 Parent(s): d7a58e0

support loading leaderboard/votes from persistent source

Browse files

Files changed (4) hide show

utils/leaderboard.py +14 -4
utils/leaderboard/arena_elo_leaderboard.csv +9 -0
utils/vote_logger.py +12 -2
utils/votes/vote_details.csv +7 -0

utils/leaderboard.py CHANGED Viewed

@@ -3,7 +3,7 @@ import pandas as pd
 import math
 from datetime import datetime
 from .models import models
-from huggingface_hub import CommitScheduler
 # Default K-factor (determines how much a single match affects ratings)
 DEFAULT_K_FACTOR = 32
@@ -12,11 +12,12 @@ DEFAULT_K_FACTOR = 32
 DEFAULT_ELO = 1500
 LEADERBOARD_FN = './utils/leaderboard/arena_elo_leaderboard.csv'
 #csv_path = os.path.join('utils', 'arena_elo_leaderboard.csv')
 leaderboard_scheduler = CommitScheduler(
-    repo_id="aizip-dev/Arena-Metadata",
     folder_path="utils/leaderboard",
     repo_type="dataset",
     every=1
@@ -133,11 +134,19 @@ def load_leaderboard_data():
     try:
         # Define the path to the CSV file for leaderboard
         csv_path = LEADERBOARD_FN
         # Check if the file exists and load it
         if os.path.exists(csv_path):
-            df = pd.read_csv(csv_path)
             # Process the data into our structure
             for _, row in df.iterrows():
                 model = row['model']
@@ -152,6 +161,7 @@ def load_leaderboard_data():
                 results["votes"] += results["wins"][model] + results["losses"][model] + results["ties"][model] // 2
         else:
             # If file doesn't exist, pre-populate with some reasonable data
             from .models import model_names
             for model in model_names:
                 results["wins"][model] = 0

 import math
 from datetime import datetime
 from .models import models
+from huggingface_hub import CommitScheduler, hf_hub_download
 # Default K-factor (determines how much a single match affects ratings)
 DEFAULT_K_FACTOR = 32
 DEFAULT_ELO = 1500
 LEADERBOARD_FN = './utils/leaderboard/arena_elo_leaderboard.csv'
+REPO_ID = "aizip-dev/Arena-Metadata"
 #csv_path = os.path.join('utils', 'arena_elo_leaderboard.csv')
 leaderboard_scheduler = CommitScheduler(
+    repo_id=REPO_ID,
     folder_path="utils/leaderboard",
     repo_type="dataset",
     every=1
     try:
         # Define the path to the CSV file for leaderboard
         csv_path = LEADERBOARD_FN
+        hub_leaderboard_path = hf_hub_download(
+            repo_id=REPO_ID,
+            filename="arena_elo_leaderboard.csv",
+            repo_type="dataset",
+        )
+        df = pd.read_csv(hub_leaderboard_path)
+        print(f"Successfully loaded leaderboard from the Hub. {len(df)} models.")
+        df.to_csv(LEADERBOARD_FN, index=False)
+        print(f"Leaderboard copied to {LEADERBOARD_FN} for CommitScheduler.")
         # Check if the file exists and load it
         if os.path.exists(csv_path):
             # Process the data into our structure
             for _, row in df.iterrows():
                 model = row['model']
                 results["votes"] += results["wins"][model] + results["losses"][model] + results["ties"][model] // 2
         else:
             # If file doesn't exist, pre-populate with some reasonable data
+            print("Leaderboard file not found. Initializing with default values.")
             from .models import model_names
             for model in model_names:
                 results["wins"][model] = 0

utils/leaderboard/arena_elo_leaderboard.csv ADDED Viewed

	@@ -0,0 +1,9 @@

+model,elo,wins,losses,ties,games_played,confidence_interval
+Llama-3.2-1b-Instruct,1516.0,1,0,0,1,784.0
+Qwen2.5-1.5b-Instruct,1500.0,0,0,0,0,inf
+Qwen2.5-3b-Instruct,1500.0,0,0,0,1,784.0
+Llama-3.2-3b-Instruct,1500.0,0,0,0,1,784.0
+Gemma-3-1b-it,1500.0,0,0,0,0,inf
+Gemma-2-2b-it,1500.0,0,0,0,1,784.0
+IBM Granite-3.3-2b-instruct,1500.0,0,0,0,0,inf
+Phi-4-mini-instruct,1484.0,0,1,0,2,554.4

utils/vote_logger.py CHANGED Viewed

@@ -3,7 +3,7 @@ import csv
 import json
 from datetime import datetime
 import pandas as pd
-from huggingface_hub import CommitScheduler
 FOLDER_PATH = "./utils/votes"
@@ -11,9 +11,19 @@ vote_details_scheduler = CommitScheduler(
     repo_id="aizip-dev/Arena-Metadata",
     folder_path="utils/votes",
     repo_type="dataset",
-    every=1
     )
 def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
     """
     Save detailed vote information to CSV file for future analysis.

 import json
 from datetime import datetime
 import pandas as pd
+from huggingface_hub import CommitScheduler, hf_hub_download
 FOLDER_PATH = "./utils/votes"
     repo_id="aizip-dev/Arena-Metadata",
     folder_path="utils/votes",
     repo_type="dataset",
+    every=1,
     )
+hub_votes = hf_hub_download(
+    repo_id="aizip-dev/Arena-Metadata",
+    filename="vote_details.csv",
+    repo_type="dataset",
+    revision="main",)
+df = pd.read_csv(hub_votes)
+print(f"Successfully loaded leaderboard from the Hub. {len(df)} models.")
+df.to_csv(os.path.join(FOLDER_PATH, "vote_details.csv"), index=False)
+print(f"Votes copied to {FOLDER_PATH} for CommitScheduler.")
 def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
     """
     Save detailed vote information to CSV file for future analysis.

utils/votes/vote_details.csv ADDED Viewed

	@@ -0,0 +1,7 @@

+timestamp,model_a,model_b,winner,feedback,question_id,question,insufficient_context,summary_a,summary_b
+2025-05-08 14:10:08,Phi-4-mini-instruct,Qwen2.5-3b-Instruct,neither,"[""Model A: Incorrect refusal (if applicable)"", ""Model B: Incorrect refusal (if applicable)""]",unknown,Who was Samuel Worcester and why was he arrested?,True,"Samuel Worcester was a missionary and a member of the Church of Jesus Christ of Latter-day Saints (LDS Church) who was involved in the Indian Territory (present-day Oklahoma) in the mid-19th century. He was arrested due to his involvement in the Cherokee Nation's efforts to establish a separate state, which was in conflict with the United States government's policies at the time. His arrest was related to his role in the Cherokee Nation's resistance against the Indian Removal Act and his support for the Cherokee's legal efforts to maintain their sovereignty, including the use of the U.S. legal system to fight against the removal of their people. Samuel Worcester was eventually convicted and sentenced to imprisonment, which led to his death in custody.",Samuel Worcester was an American legal activist and missionary who opposed the forced relocation of Native Americans from their ancestral lands west of the Mississippi River during the 1830s. He was arrested along with other individuals for refusing to leave Indian Territory as part of the Indian Removal Act.
+2025-05-08 14:14:43,Llama-3.2-1b-Instruct,Phi-4-mini-instruct,left,"[""Model A: More complete""]",unknown,What is the recommended minimum hours of sleep per night to avoid sleep deprivation?,False,"The query is answerable from the provided context. To determine the recommended minimum hours of sleep per night to avoid sleep deprivation, one would typically consult sleep hygiene guidelines or studies that focus on sleep duration and its effects on health.
+The National Sleep Foundation recommends that adults aim for 7-9 hours of sleep per night for optimal health and functioning. However, individual sleep needs can vary based on factors such as age, lifestyle, and personal preferences.","The recommended minimum hours of sleep per night to avoid sleep deprivation is typically 7 to 9 hours. However, individual needs can vary, and it's important to listen to your body and adjust accordingly. If you have specific concerns or conditions, consulting a healthcare professional is advised."
+2025-05-08 14:15:07,Gemma-2-2b-it,Llama-3.2-3b-Instruct,neither,"[""Model B: Incorrect refusal (if applicable)"", ""Model A: Incorrect refusal (if applicable)""]",unknown,What's the maritime equivalent of yelling 'surprise!' when rounding a blind corner?,False,"Context does not contain information about maritime equivalents of actions.
+","I do not have enough information to answer this query. The context provided does not include any relevant information about maritime terminology, rounding a blind corner, or the concept of ""yelling'surprise!'"" in this context."