Spaces:

tfrere
/

leaderboard-parser-agent

Build error

File size: 3,643 Bytes

#!/usr/bin/env python
"""
Main script for the leaderboard parser.
This script processes leaderboards specified in data/best_model_for_category_list.json file
by matching their UIDs with hosts in data/final_leaderboards.json.

Environment variables:
    HUGGING_FACE_HUB_TOKEN: Authentication token for Hugging Face Hub (required)
    HUGGING_FACE_STORAGE_REPO: Target dataset name on the Hub (optional, default: leaderboard-explorer/leaderboard_explorer)
    LEADERBOARD_REPROCESS_INTERVAL_HOURS: Interval in hours between leaderboard processing runs (default: 24)
"""
import argparse
import logging
from dotenv import load_dotenv
import uvicorn
import sys
import os

# Import from src modules
from src.processor import process_leaderboards
from src.server import app, initialize_server
from src.scheduler import initialize_scheduler, start_scheduler

# Configure logging
logging.basicConfig(
    level=logging.INFO,
    format="%(asctime)s - %(name)s - %(levelname)s - %(message)s",
)
logger = logging.getLogger("leaderboard-parser")

def main():
    """
    Main function to process leaderboards specified in best_model_for_category_list.json.
    """
    # Parse command line arguments
    parser = argparse.ArgumentParser(description="Leaderboard Parser")
    parser.add_argument("--clean", action="store_true", help="Clean the results file before starting")
    parser.add_argument("--force-retry-uid", help="Force retry for a specific leaderboard UID")
    parser.add_argument("--force-retry-category", help="Force retry for all leaderboards of a specific category")
    parser.add_argument("--upload-only", action="store_true", help="Only upload local files to the Hub without processing leaderboards")
    parser.add_argument("--local-only", action="store_true", help="Local mode only: do not download from the Hub and do not upload to the Hub")
    parser.add_argument("--ignore-cooldown", action="store_true", help="Force reprocessing of rejected leaderboards even if it's been less than 24h")
    parser.add_argument("--server", action="store_true", help="Run as a web server with scheduled processing")
    args = parser.parse_args()
    
    # Load environment variables
    load_dotenv()

    # Check if we should run in server mode
    if args.server:
        run_server_mode(args)
        return

    # Convert args to dict for process_leaderboards
    args_dict = vars(args)
    
    # Process the leaderboards
    success, message = process_leaderboards(args_dict)
    
    if success:
        logger.info(message)
        return 0
    else:
        logger.error(message)
        return 1

def run_server_mode(args):
    """Run the application in server mode with periodic processing"""
    # Convert command line arguments to dictionary
    args_dict = vars(args)
    
    # Initialize server and scheduler with the process_leaderboards function
    initialize_server(process_leaderboards)
    initialize_scheduler(process_leaderboards, args_dict)
    
    # Start the scheduler thread
    scheduler = start_scheduler()
    
    try:
        # Log startup information
        logger.info("Running in server mode with periodic processing")
        
        # Get port from environment variable or use default HF Spaces port
        port = int(os.environ.get("PORT", 7860))
        logger.info(f"Starting server on port {port}")
        
        # Run the FastAPI server
        uvicorn.run(app, host="0.0.0.0", port=port)
    except KeyboardInterrupt:
        logger.info("Server stopped by user")
    except Exception as e:
        logger.error(f"Error running server: {e}")

if __name__ == "__main__":
    sys.exit(main())