Spaces:
Running
on
Zero
Running
on
Zero
File size: 4,503 Bytes
a24f6cf c9a7557 15dd199 d7a58e0 15dd199 d7a58e0 15dd199 c9a7557 15dd199 a24f6cf c9a7557 491e00d 5e2794d c9a7557 5e2794d c9a7557 a24f6cf d7a58e0 a24f6cf 15dd199 491e00d a24f6cf 15dd199 a24f6cf 702ad2e a24f6cf d7a58e0 a24f6cf 702ad2e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import os
import csv
import json
from datetime import datetime
import pandas as pd
from huggingface_hub import CommitScheduler, hf_hub_download
FOLDER_PATH = "./utils/votes"
vote_details_scheduler = CommitScheduler(
repo_id="aizip-dev/Arena-Metadata",
folder_path="utils/votes",
repo_type="dataset",
every=1,
)
hub_votes = hf_hub_download(
repo_id="aizip-dev/Arena-Metadata",
filename="vote_details.csv",
repo_type="dataset",
revision="main",
)
df = pd.read_csv(hub_votes)
print(f"Successfully loaded leaderboard from the Hub. {len(df)} votes.")
df.to_csv(os.path.join(FOLDER_PATH, "vote_details.csv"), index=False)
print(f"Votes copied to {FOLDER_PATH} for CommitScheduler.")
def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
"""
Save detailed vote information to CSV file for future analysis.
Parameters:
- example: The question and context information
- model_a, model_b: Names of models being compared
- winner: 'left', 'right', 'tie', or 'neither' indicating the vote result
- feedback: List of feedback options selected by the user
- summary_a, summary_b: The model outputs (summaries)
"""
# Prepare the vote details record
vote_record = {
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
"model_a": model_a,
"model_b": model_b,
"winner": winner,
"feedback": json.dumps(feedback),
"question_id": example.get("id", "unknown"),
"question": example.get("question", ""),
"insufficient_context": example.get("insufficient", False),
"summary_a": summary_a,
"summary_b": summary_b
}
# Define the path to the CSV file
csv_path = os.path.join(FOLDER_PATH, 'vote_details.csv')
file_exists = os.path.exists(csv_path)
try:
# Open the file in append mode
with vote_details_scheduler:
with open(csv_path, 'a', newline='', encoding='utf-8') as f:
writer = csv.DictWriter(f, fieldnames=vote_record.keys())
# Write header if file doesn't exist
if not file_exists:
writer.writeheader()
# Write the vote record
writer.writerow(vote_record)
print(f"Vote details saved to {csv_path}")
except Exception as e:
print(f"Error saving vote details: {e}")
def get_vote_statistics():
"""
Analyze vote details and provide statistics.
Returns:
- Dictionary of statistics about votes
"""
csv_path = os.path.join(FOLDER_PATH, 'vote_details.csv')
if not os.path.exists(csv_path):
return {"error": "No vote data available"}
try:
# Read the CSV into a DataFrame
df = pd.read_csv(csv_path)
# Basic statistics
stats = {
"total_votes": len(df),
"winner_distribution": {
"left": len(df[df['winner'] == 'left']),
"right": len(df[df['winner'] == 'right']),
"tie": len(df[df['winner'] == 'tie']),
"neither": len(df[df['winner'] == 'neither'])
},
"model_appearances": {},
"model_wins": {},
"feedback_frequency": {}
}
# Count model appearances and wins
for model in set(list(df['model_a']) + list(df['model_b'])):
a_appearances = len(df[df['model_a'] == model])
b_appearances = len(df[df['model_b'] == model])
stats["model_appearances"][model] = a_appearances + b_appearances
a_wins = len(df[(df['model_a'] == model) & (df['winner'] == 'left')])
b_wins = len(df[(df['model_b'] == model) & (df['winner'] == 'right')])
stats["model_wins"][model] = a_wins + b_wins
# Process feedback
all_feedback = []
for feedback_json in df['feedback']:
try:
feedback_list = json.loads(feedback_json)
all_feedback.extend(feedback_list)
except:
pass
for feedback in all_feedback:
stats["feedback_frequency"][feedback] = stats["feedback_frequency"].get(feedback, 0) + 1
return stats
except Exception as e:
return {"error": f"Error analyzing vote data: {e}"} |