Spaces:
				
			
			
	
			
			
		Sleeping
		
	
	
	
			
			
	
	
	
	
		
		
		Sleeping
		
	File size: 3,238 Bytes
			
			| d48eef6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 | import csv
import os
import sys
from datetime import datetime
from ..scrape.config import FIGHTS_CSV_PATH, FIGHTERS_CSV_PATH
def load_fighters_data():
    """Loads fighter data, including ELO scores, into a dictionary."""
    if not os.path.exists(FIGHTERS_CSV_PATH):
        print(f"Error: Fighter data not found at '{FIGHTERS_CSV_PATH}'.")
        print("Please run the ELO analysis first ('python -m src.analysis.elo').")
        return None
    
    fighters = {}
    with open(FIGHTERS_CSV_PATH, 'r', encoding='utf-8') as f:
        reader = csv.DictReader(f)
        for row in reader:
            full_name = f"{row['first_name']} {row['last_name']}".strip()
            fighters[full_name] = {'elo': float(row.get('elo', 1500))} # Default ELO if missing
    return fighters
def load_fights_data():
    """Loads fight data and sorts it chronologically."""
    if not os.path.exists(FIGHTS_CSV_PATH):
        print(f"Error: Fights data not found at '{FIGHTS_CSV_PATH}'.")
        return None
    with open(FIGHTS_CSV_PATH, 'r', encoding='utf-8') as f:
        fights = list(csv.DictReader(f))
    
    # Sort fights chronologically to ensure a proper train/test split later
    fights.sort(key=lambda x: datetime.strptime(x['event_date'], '%B %d, %Y'))
    return fights
def run_elo_baseline_model(fights, fighters):
    """
    Runs a simple baseline prediction model where the fighter with the higher ELO is predicted to win.
    """
    correct_predictions = 0
    total_predictions = 0
    
    for fight in fights:
        fighter1_name = fight['fighter_1']
        fighter2_name = fight['fighter_2']
        actual_winner = fight['winner']
        # Skip fights that are draws or no contests
        if actual_winner in ["Draw", "NC", ""]:
            continue
            
        fighter1 = fighters.get(fighter1_name)
        fighter2 = fighters.get(fighter2_name)
        if not fighter1 or not fighter2:
            continue # Skip if fighter data is missing
        elo1 = fighter1.get('elo', 1500)
        elo2 = fighter2.get('elo', 1500)
        
        # Predict winner based on higher ELO
        predicted_winner = fighter1_name if elo1 > elo2 else fighter2_name
        
        if predicted_winner == actual_winner:
            correct_predictions += 1
        
        total_predictions += 1
        
    accuracy = (correct_predictions / total_predictions) * 100 if total_predictions > 0 else 0
    return accuracy, total_predictions
def main():
    """
    Main function to run the prediction pipeline.
    """
    print("--- Starting ML Prediction Pipeline ---")
    # Load data
    fighters_data = load_fighters_data()
    fights_data = load_fights_data()
    
    if not fighters_data or not fights_data:
        print("Aborting pipeline due to missing data.")
        return
    # Run baseline model
    print("\nRunning Baseline Model (Predicting winner by highest ELO)...")
    accuracy, total_fights = run_elo_baseline_model(fights_data, fighters_data)
    
    print("\n--- Baseline Model Evaluation ---")
    print(f"Total Fights Evaluated: {total_fights}")
    print(f"Model Accuracy: {accuracy:.2f}%")
    print("---------------------------------")
if __name__ == '__main__':
    main()  |