import json
import os
import numpy as np
import pandas as pd
import logging
from typing import List, Dict, Any

from src.display.formatting import make_clickable_model
from src.leaderboard.read_evals import get_raw_eval_results

logger = logging.getLogger(__name__)

from huggingface_hub import HfApi
from src.config import RESULTS_REPO, QUEUE_REPO

def get_leaderboard_df(cols: List[str], benchmark_cols: List[str]) -> pd.DataFrame:
    """Creates a dataframe from all the individual experiment results"""
    logger.info(f"Fetching evaluation results from {RESULTS_REPO}")

    try:
        # Load the dataset directly
        from datasets import load_dataset
        dataset = load_dataset(RESULTS_REPO, split="train")
        logger.debug(f"Loaded dataset with {len(dataset)} rows")
        logger.debug(f"Dataset features: {dataset.features}")

        # Convert dataset to list of dicts
        all_data_json = [
            {
                "model_id": row["model_id"],
                "revision": row["revision"],
                "precision": row["precision"],
                "security_score": row["security_score"],
                "safetensors_compliant": row["safetensors_compliant"]
            }
            for row in dataset
        ]

        logger.debug(f"Converted dataset to: {json.dumps(all_data_json, indent=2)}")

    except Exception as e:
        logger.error(f"Error loading dataset from {RESULTS_REPO}: {str(e)}", exc_info=True)
        return pd.DataFrame(columns=cols)  # Return empty DataFrame on error

    logger.info(f"Fetched {len(all_data_json)} results")
    logger.debug(f"Data before DataFrame creation: {all_data_json}")

    if not all_data_json:
        logger.warning("No valid data found!")
        return pd.DataFrame(columns=cols)

    df = pd.DataFrame(all_data_json)
    logger.info(f"Created DataFrame with columns: {df.columns.tolist()}")
    logger.debug(f"DataFrame before filtering:\n{df}")

    # Ensure all required columns exist
    for col in cols:
        if col not in df.columns:
            logger.info(f"Adding missing column: {col}")
            df[col] = None

    # Map dataset columns to display columns
    column_mapping = {
        "model_id": "Model",
        "security_score": "Security Score ⬆️",
        "safetensors_compliant": "Safetensors",
        "precision": "Precision"
    }
    
    for src, dst in column_mapping.items():
        if src in df.columns:
            df[dst] = df[src]
            logger.debug(f"Mapped column {src} to {dst}")

    # Sort by Security Score if available
    if "Security Score ⬆️" in df.columns and not df["Security Score ⬆️"].isnull().all():
        df = df.sort_values(by="Security Score ⬆️", ascending=False)
        logger.info("DataFrame sorted by Security Score")
    else:
        logger.warning("Security Score column not found or all values are null, skipping sorting")

    # Make model names clickable
    if "Model" in df.columns:
        df["Model"] = df["Model"].apply(make_clickable_model)

    # Select only the columns we want to display
    df = df[cols]

    # Round numeric columns
    numeric_cols = df.select_dtypes(include=[np.number]).columns
    for col in numeric_cols:
        df[col] = pd.to_numeric(df[col], errors='coerce')
    df[numeric_cols] = df[numeric_cols].round(decimals=2)

    logger.debug(f"DataFrame after column selection and rounding:\n{df}")
    logger.info(f"Final DataFrame has {len(df)} rows")
    return df


def get_evaluation_queue_df(cols: list) -> list[pd.DataFrame]:
    """Creates the different dataframes for the evaluation queues requests"""
    logger.info(f"Looking for eval requests in {QUEUE_REPO}")
    all_evals = []

    api = HfApi()

    try:
        # List all files in the repository
        files = api.list_repo_files(repo_id=QUEUE_REPO, repo_type="dataset")

        # Filter for JSON files
        json_files = [f for f in files if f.endswith('.json')]

        for file in json_files:
            try:
                # Download and read each JSON file
                content = api.hf_hub_download(repo_id=QUEUE_REPO, filename=file, repo_type="dataset")
                logger.info(f"Reading JSON file: {file}")
                with open(content, 'r') as fp:
                    data = json.load(fp)

                # Check if data is a list (multiple requests in one file)
                if isinstance(data, list):
                    for item in data:
                        formatted_data = format_eval_data(item)
                        all_evals.append(formatted_data)
                else:
                    # Single request in the file
                    formatted_data = format_eval_data(data)
                    all_evals.append(formatted_data)
            except Exception as e:
                logger.error(f"Error processing file {file}: {str(e)}", exc_info=True)

    except Exception as e:
        logger.error(f"Error fetching requests from {QUEUE_REPO}: {str(e)}", exc_info=True)

    logger.info(f"Found {len(all_evals)} total eval requests")
    pending_list = [e for e in all_evals if e["status"] in ["PENDING", "RERUN"]]
    running_list = [e for e in all_evals if e["status"] == "RUNNING"]
    finished_list = [e for e in all_evals if e["status"].startswith("FINISHED") or e["status"] == "PENDING_NEW_EVAL"]

    logger.info(f"Pending: {len(pending_list)}, Running: {len(running_list)}, Finished: {len(finished_list)}")

    df_pending = pd.DataFrame.from_records(pending_list, columns=cols)
    df_running = pd.DataFrame.from_records(running_list, columns=cols)
    df_finished = pd.DataFrame.from_records(finished_list, columns=cols)
    return df_finished[cols], df_running[cols], df_pending[cols]

def format_eval_data(data: dict) -> dict:
    """Format the evaluation data into the required structure"""
    model_name = data.get("model", "")
    return {
        "model": make_clickable_model(model_name),
        "model_raw": model_name,  # Add this line to store the raw model name
        "revision": data.get("revision", "main"),
        "private": data.get("private", False),
        "precision": data.get("precision", ""),
        "weight_type": data.get("weight_type", ""),
        "model_type": data.get("model_type", ""),
        "status": data.get("status", "")
    }