import logging
import re

import gradio as gr
import pandas as pd

from results import results

logging.basicConfig(level=logging.DEBUG)


def get_model_names():
    """
    Retrieve a sorted list of model names from the results data.

    Returns:
        list: Sorted list of model names.
    """
    return sorted([model['name'] for model in results['models']])


def get_models_by_architecture(model_name):
    """
    Retrieve models with the same architecture as the specified model.

    Args:
        model_name (str): Name of the model to match architecture.

    Returns:
        list: List of models with the same architecture.
    """
    selected_model = next((m for m in results['models'] if m['name'] == model_name), None)
    if not selected_model:
        return []
   
    model_type = selected_model.get('modelType', '')
    return [m for m in results['models'] if m.get('modelType', '') == model_type]


def custom_sort_key(instance_type):
    """
    Generate a custom sorting key for instance types.

    Args:
        instance_type (str): The instance type to generate a key for.

    Returns:
        tuple: A tuple used for sorting, containing (family, size_index).
    """
    size_order = ['xlarge', '2xlarge', '4xlarge', '8xlarge', '12xlarge', '16xlarge', '24xlarge', '48xlarge']
    
    match = re.match(r'([a-z]+\d+)\.(\w+)', instance_type)
    if match:
        family, size = match.groups()
        return (family, size_order.index(size) if size in size_order else len(size_order))
    return (instance_type, 0)  # Fallback for non-standard instance types


def display_results(model_name):
    """
    Process and display results for a given model.

    Args:
        model_name (str): Name of the model to display results for.

    Returns:
        tuple: A tuple containing:
            - str: Markdown formatted string with model information.
            - pandas.DataFrame: Styled DataFrame with the results.
    """
    try:
        models = get_models_by_architecture(model_name)
        if not models:
            logging.warning(f"No models found for {model_name}")
            return f"No results found for the selected model: {model_name}", pd.DataFrame()

        model_type = models[0].get('modelType', 'N/A')
        data = {}
        merged_models = set()

        for model in models:
            merged_models.add(model.get('name', 'Unknown'))
            for config in model.get('configurations', []):
                try:
                    cloud = config.get('cloud', 'N/A')
                    instance_type = config.get('instanceType', 'N/A')
                    key = (cloud, instance_type)

                    if 'configurations' in config:
                        for nested_config in config['configurations']:
                            nested_key = key + (nested_config.get('quantization', 'N/A'),)
                            data[nested_key] = {
                                "Cloud": cloud,
                                "Instance Type": instance_type,
                                "GPU": config.get('gpu', 'N/A'),
                                "GPU RAM": config.get('gpuRAM', 'N/A'),
                                "Status": nested_config.get('status', 'N/A'),
                                "Quantization": nested_config.get('quantization', 'N/A'),
                                "Container": nested_config.get('container', nested_config.get('tgi', 'N/A')),
                                "Tokens per Second": nested_config.get('tokensPerSecond', 'N/A'),
                                "Notes": nested_config.get('notes', ''),
                            }
                    else:
                        # Generate a unique key for each configuration
                        unique_key = key + (config.get('quantization', 'N/A'), len(data))
                        data[unique_key] = {
                            "Cloud": cloud,
                            "Instance Type": instance_type,
                            "GPU": config.get('gpu', 'N/A'),
                            "GPU RAM": config.get('gpuRAM', 'N/A'),
                            "Status": config.get('status', 'N/A'),
                            "Quantization": config.get('quantization', 'N/A'),
                            "Container": config.get('container', config.get('tgi', 'N/A')),
                            "Tokens per Second": config.get('tokensPerSecond', 'N/A'),
                            "Notes": config.get('notes', ''),
                        }
                except Exception as e:
                    print(f"Error processing configuration: {e}")
                    continue

        if not data:
            logging.warning(f"No data extracted for {model_name}")
            return f"No data could be extracted for the selected model: {model_name}", pd.DataFrame()

        # Merge data if there are conflicts
        for key, value in data.items():
            for field in value:
                if value[field] == 'N/A':
                    for other_key, other_value in data.items():
                        if other_key[0] == key[0] and other_value[field] != 'N/A':
                            value[field] = other_value[field]
                            break

        # Filter out rows where Status is 'N/A'
        data = {k: v for k, v in data.items() if v['Status'] != 'N/A'}

        merged_models_message = f"Note: Results merged from models: {', '.join(merged_models)}" if len(merged_models) > 1 else None

        # Sort the data by instance type
        sorted_data = sorted(data.values(), key=lambda x: custom_sort_key(x['Instance Type']))

        results = f"## Results for {model_name}\n\nModel Type: {model_type}"
        if merged_models_message:
            results += f"\n\n{merged_models_message}"
        
        df = pd.DataFrame(sorted_data)
        
        def color_status(val):
            if val == 'OK':
                return 'background-color: green; color: white'
            elif val == 'KO':
                return 'background-color: red; color: white'
            else:
                return ''

        styled_df = df.style.applymap(color_status, subset=['Status'])
        
        return results, styled_df

    except Exception as e:
        logging.exception(f"Error in display_results: {e}")
        return f"An error occurred while processing results for {model_name}: {str(e)}", pd.DataFrame()

with gr.Blocks() as demo:
    gr.Markdown("# Model Benchmark Results")
    gr.Markdown("This table shows the benchmark results for each model. Container settings ([TGI](https://huggingface.co/docs/text-generation-inference/reference/launcher), [vLLM](https://docs.djl.ai/master/docs/serving/serving/docs/lmi/user_guides/vllm_user_guide.html), etc.) are default unless noted.")
    model_dropdown = gr.Dropdown(choices=get_model_names(), label="Select Model")
    
    results_text = gr.Markdown()
    results_output = gr.DataFrame(label="Results")
    
    model_dropdown.change(
        display_results,
        inputs=[model_dropdown],
        outputs=[results_text, results_output]
    )

if __name__ == "__main__":
    demo.launch()