File size: 4,453 Bytes
a24f6cf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import os
import csv
import json
from datetime import datetime
import pandas as pd

def save_vote_details(example, model_a, model_b, winner, feedback, summary_a, summary_b):
    """
    Save detailed vote information to CSV file for future analysis.
    
    Parameters:
    - example: The question and context information
    - model_a, model_b: Names of models being compared
    - winner: 'left', 'right', 'tie', or 'neither' indicating the vote result
    - feedback: List of feedback options selected by the user
    - summary_a, summary_b: The model outputs (summaries)
    """
    # Prepare the vote details record
    vote_record = {
        "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
        "model_a": model_a,
        "model_b": model_b,
        "winner": winner,
        "feedback": json.dumps(feedback),
        "question_id": example.get("id", "unknown"),
        "question": example.get("question", ""),
        "insufficient_context": example.get("insufficient", False),
        "summary_a": summary_a,
        "summary_b": summary_b
    }
    
    # Define the path to the CSV file
    csv_path = os.path.join('utils', 'vote_details.csv')
    file_exists = os.path.exists(csv_path)
    
    try:
        # Open the file in append mode
        with open(csv_path, 'a', newline='', encoding='utf-8') as f:
            writer = csv.DictWriter(f, fieldnames=vote_record.keys())
            
            # Write header if file doesn't exist
            if not file_exists:
                writer.writeheader()
            
            # Write the vote record
            writer.writerow(vote_record)
            
        print(f"Vote details saved to {csv_path}")
    except Exception as e:
        print(f"Error saving vote details: {e}")
        
    # Create a backup copy every 10 votes
    try:
        if os.path.exists(csv_path):
            with open(csv_path, 'r', encoding='utf-8') as f:
                num_votes = sum(1 for _ in f) - 1  # Subtract 1 for header
            
            if num_votes % 10 == 0:
                backup_path = os.path.join('utils', f'vote_details_backup_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv')
                with open(csv_path, 'r', encoding='utf-8') as src, open(backup_path, 'w', encoding='utf-8') as dst:
                    dst.write(src.read())
                print(f"Created backup at {backup_path}")
    except Exception as e:
        print(f"Error creating backup: {e}")

def get_vote_statistics():
    """
    Analyze vote details and provide statistics.
    
    Returns:
    - Dictionary of statistics about votes
    """
    csv_path = os.path.join('utils', 'vote_details.csv')
    
    if not os.path.exists(csv_path):
        return {"error": "No vote data available"}
    
    try:
        # Read the CSV into a DataFrame
        df = pd.read_csv(csv_path)
        
        # Basic statistics
        stats = {
            "total_votes": len(df),
            "winner_distribution": {
                "left": len(df[df['winner'] == 'left']),
                "right": len(df[df['winner'] == 'right']),
                "tie": len(df[df['winner'] == 'tie']),
                "neither": len(df[df['winner'] == 'neither'])
            },
            "model_appearances": {},
            "model_wins": {},
            "feedback_frequency": {}
        }
        
        # Count model appearances and wins
        for model in set(list(df['model_a']) + list(df['model_b'])):
            a_appearances = len(df[df['model_a'] == model])
            b_appearances = len(df[df['model_b'] == model])
            stats["model_appearances"][model] = a_appearances + b_appearances
            
            a_wins = len(df[(df['model_a'] == model) & (df['winner'] == 'left')])
            b_wins = len(df[(df['model_b'] == model) & (df['winner'] == 'right')])
            stats["model_wins"][model] = a_wins + b_wins
        
        # Process feedback
        all_feedback = []
        for feedback_json in df['feedback']:
            try:
                feedback_list = json.loads(feedback_json)
                all_feedback.extend(feedback_list)
            except:
                pass
                
        for feedback in all_feedback:
            stats["feedback_frequency"][feedback] = stats["feedback_frequency"].get(feedback, 0) + 1
            
        return stats
    
    except Exception as e:
        return {"error": f"Error analyzing vote data: {e}"}