import gradio as gr
import os
from PIL import Image
import numpy as np
import pickle
import io
import sys
import torch
import subprocess
import h5py
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.metrics import f1_score
import seaborn as sns


#################### BEAM PREDICTION #########################}
def beam_prediction_task(data_percentage, task_complexity):
    # Folder naming convention based on input_type, data_percentage, and task_complexity
    raw_folder = f"images/raw_{data_percentage/100:.1f}_{task_complexity}"
    embeddings_folder = f"images/embedding_{data_percentage/100:.1f}_{task_complexity}"

    # Process raw confusion matrix
    raw_cm = compute_average_confusion_matrix(raw_folder)
    if raw_cm is not None:
        raw_cm_path = os.path.join(raw_folder, "confusion_matrix_raw.png")
        plot_confusion_matrix_beamPred(raw_cm, classes=np.arange(raw_cm.shape[0]), title=f"Raw Confusion Matrix ({data_percentage}% data, {task_complexity} beams)", save_path=raw_cm_path)
        raw_img = Image.open(raw_cm_path)
    else:
        raw_img = None

    # Process embeddings confusion matrix
    embeddings_cm = compute_average_confusion_matrix(embeddings_folder)
    if embeddings_cm is not None:
        embeddings_cm_path = os.path.join(embeddings_folder, "confusion_matrix_embeddings.png")
        plot_confusion_matrix_beamPred(embeddings_cm, classes=np.arange(embeddings_cm.shape[0]), title=f"Embeddings Confusion Matrix ({data_percentage}% data, {task_complexity} beams)", save_path=embeddings_cm_path)
        embeddings_img = Image.open(embeddings_cm_path)
    else:
        embeddings_img = None

    return raw_img, embeddings_img

from sklearn.metrics import f1_score

# Function to compute the F1-score based on the confusion matrix
def compute_f1_score(cm):
    # Compute precision and recall
    TP = np.diag(cm)
    FP = np.sum(cm, axis=0) - TP
    FN = np.sum(cm, axis=1) - TP
    
    precision = TP / (TP + FP)
    recall = TP / (TP + FN)
    
    # Handle division by zero in precision or recall
    precision = np.nan_to_num(precision)
    recall = np.nan_to_num(recall)
    
    # Compute F1 score
    f1 = 2 * (precision * recall) / (precision + recall)
    f1 = np.nan_to_num(f1)  # Replace NaN with 0
    return np.mean(f1)  # Return the mean F1-score across all classes

# Function to plot and save confusion matrix with F1-score in the title
def plot_confusion_matrix_beamPred(cm, classes, title, save_path):
    # Compute the average F1-score
    avg_f1 = compute_f1_score(cm)
    
    # Update title to include average F1-score
    full_title = f"{title} (Avg F1-Score: {avg_f1:.2f})"
    
    # Plot the confusion matrix
    plt.figure(figsize=(8, 6))
    plt.imshow(cm, interpolation='nearest', cmap='coolwarm')
    plt.title(full_title)
    plt.colorbar()
    
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')
    plt.savefig(save_path)
    plt.close()

def compute_average_confusion_matrix(folder):
    confusion_matrices = []
    max_num_labels = 0

    # First pass to determine the maximum number of labels
    for file in os.listdir(folder):
        if file.endswith(".csv"):
            data = pd.read_csv(os.path.join(folder, file))
            num_labels = len(np.unique(data["Target"]))
            max_num_labels = max(max_num_labels, num_labels)

    # Second pass to calculate the confusion matrices and pad if necessary
    for file in os.listdir(folder):
        if file.endswith(".csv"):
            data = pd.read_csv(os.path.join(folder, file))
            y_true = data["Target"]
            y_pred = data["Top-1 Prediction"]
            num_labels = len(np.unique(y_true))
            
            # Compute confusion matrix
            cm = confusion_matrix(y_true, y_pred, labels=np.arange(max_num_labels))

            # If the confusion matrix is smaller, pad it to match the largest size
            if cm.shape[0] < max_num_labels:
                padded_cm = np.zeros((max_num_labels, max_num_labels))
                padded_cm[:cm.shape[0], :cm.shape[1]] = cm
                confusion_matrices.append(padded_cm)
            else:
                confusion_matrices.append(cm)

    if confusion_matrices:
        avg_cm = np.mean(confusion_matrices, axis=0)
        return avg_cm
    else:
        return None

########################## LOS/NLOS CLASSIFICATION #############################3


# Paths to the predefined images folder
LOS_PATH = "images_LoS"

# Define the percentage values
percentage_values_los = np.linspace(0.001, 1, 20) * 100  # 20 percentage values

# Function to compute confusion matrix and plot it
from sklearn.metrics import f1_score
import seaborn as sns

# Function to compute confusion matrix, F1-score and plot it with dark mode style
def plot_confusion_matrix_from_csv(csv_file_path, title, save_path):
    # Load CSV file
    data = pd.read_csv(csv_file_path)
    
    # Extract ground truth and predictions
    y_true = data['Target']
    y_pred = data['Top-1 Prediction']
    
    # Compute confusion matrix
    cm = confusion_matrix(y_true, y_pred)
    
    # Compute F1-score
    f1 = f1_score(y_true, y_pred, average='macro')  # Macro-average F1-score
    
    # Set dark mode styling
    plt.style.use('dark_background')
    plt.figure(figsize=(5, 5))
    
    # Plot the confusion matrix with a dark-mode compatible colormap
    sns.heatmap(cm, annot=True, fmt="d", cmap="magma", cbar=False, annot_kws={"size": 12}, linewidths=0.5, linecolor='white')
    
    # Add F1-score to the title
    plt.title(f"{title} (F1 Score: {f1:.3f})", color="white", fontsize=14)
    
    # Customize tick labels for dark mode
    plt.xticks([0.5, 1.5], labels=['Class 0', 'Class 1'], color="white", fontsize=10)
    plt.yticks([0.5, 1.5], labels=['Class 0', 'Class 1'], color="white", fontsize=10)
    
    plt.ylabel('True label', color="white", fontsize=12)
    plt.xlabel('Predicted label', color="white", fontsize=12)
    plt.tight_layout()
    
    # Save the plot as an image
    plt.savefig(save_path, transparent=True)  # Use transparent to blend with the dark mode website
    plt.close()
    
    # Return the saved image
    return Image.open(save_path)

# Function to load confusion matrix based on percentage and input_type
def display_confusion_matrices_los(percentage_idx):
    percentage = percentage_values_los[percentage_idx]
    
    # Construct folder names
    raw_folder = os.path.join(LOS_PATH, f"raw_{percentage/100:.3f}_los_noTraining")
    embeddings_folder = os.path.join(LOS_PATH, f"embedding_{percentage/100:.3f}_los_noTraining")
    
    # Process raw confusion matrix
    raw_csv_file = os.path.join(raw_folder, f"test_predictions_raw_{percentage/100:.3f}_los.csv")
    raw_cm_img_path = os.path.join(raw_folder, "confusion_matrix_raw.png")
    raw_img = plot_confusion_matrix_from_csv(raw_csv_file, 
                                             f"Raw Confusion Matrix ({percentage:.1f}% data)", 
                                             raw_cm_img_path)

    # Process embeddings confusion matrix
    embeddings_csv_file = os.path.join(embeddings_folder, f"test_predictions_embedding_{percentage/100:.3f}_los.csv")
    embeddings_cm_img_path = os.path.join(embeddings_folder, "confusion_matrix_embeddings.png")
    embeddings_img = plot_confusion_matrix_from_csv(embeddings_csv_file, 
                                                    f"Embeddings Confusion Matrix ({percentage:.1f}% data)", 
                                                    embeddings_cm_img_path)

    return raw_img, embeddings_img

# Main function to handle user choice
def handle_user_choice(choice, percentage_idx=None, uploaded_file=None):
    if choice == "Use Default Dataset":
        raw_img, embeddings_img = display_confusion_matrices_los(percentage_idx)
        return raw_img, embeddings_img, ""  # Return empty string for console output
    elif choice == "Upload Dataset":
        if uploaded_file is not None:
            raw_img, embeddings_img, console_output = process_hdf5_file(uploaded_file, percentage_idx)
            return raw_img, embeddings_img, console_output
        else:
            return "Please upload a dataset", "Please upload a dataset", ""  # Return empty string for console output
    else:
        return "Invalid choice", "Invalid choice", ""  # Return empty string for console output

# Custom class to capture print output
class PrintCapture(io.StringIO):
    def __init__(self):
        super().__init__()
        self.output = []

    def write(self, txt):
        self.output.append(txt)
        super().write(txt)

    def get_output(self):
        return ''.join(self.output)

# Function to load and display predefined images based on user selection
def display_predefined_images(percentage_idx):
    percentage = percentage_values_los[percentage_idx]
    raw_image_path = os.path.join(RAW_PATH, f"percentage_{percentage}_complexity_16.png")
    embeddings_image_path = os.path.join(EMBEDDINGS_PATH, f"percentage_{percentage}_complexity_16.png")
    
    # Check if the images exist
    if os.path.exists(raw_image_path):
        raw_image = Image.open(raw_image_path)
    else:
        raw_image = create_random_image()  # Use a fallback random image
    
    if os.path.exists(embeddings_image_path):
        embeddings_image = Image.open(embeddings_image_path)
    else:
        embeddings_image = create_random_image()  # Use a fallback random image

    return raw_image, embeddings_image

def los_nlos_classification(file, percentage_idx):
    if file is not None:
        raw_cm_image, emb_cm_image, console_output = process_hdf5_file(file, percentage_idx)
        return raw_cm_image, emb_cm_image, console_output  # Returning all three: two images and console output
    else:
        raw_image, embeddings_image = display_predefined_images(percentage_idx)
        return raw_image, embeddings_image, ""  # Return an empty string for console output when no file is uploaded

# Function to create random images for LoS/NLoS classification results
def create_random_image(size=(300, 300)):
    random_image = np.random.rand(*size, 3) * 255
    return Image.fromarray(random_image.astype('uint8'))

import importlib.util

# Function to dynamically load a Python module from a given file path
def load_module_from_path(module_name, file_path):
    spec = importlib.util.spec_from_file_location(module_name, file_path)
    module = importlib.util.module_from_spec(spec)
    spec.loader.exec_module(module)
    return module

# Function to split dataset into training and test sets based on user selection
def split_dataset(channels, labels, percentage_idx):
    percentage = percentage_values_los[percentage_idx] / 100
    num_samples = channels.shape[0]
    train_size = int(num_samples * percentage)
    print(f'Number of Training Samples: {train_size}')
    
    indices = np.arange(num_samples)
    np.random.shuffle(indices)
    
    train_idx, test_idx = indices[:train_size], indices[train_size:]
    
    train_data, test_data = channels[train_idx], channels[test_idx]
    train_labels, test_labels = labels[train_idx], labels[test_idx]
    
    return train_data, test_data, train_labels, test_labels

# Function to calculate Euclidean distance between a point and a centroid
def euclidean_distance(x, centroid):
    return np.linalg.norm(x - centroid)

import torch

def classify_based_on_distance(train_data, train_labels, test_data):
    # Compute the centroids for the two classes
    centroid_0 = train_data[train_labels == 0].mean(dim=0)  # Use torch.mean
    centroid_1 = train_data[train_labels == 1].mean(dim=0)  # Use torch.mean
    
    predictions = []
    for test_point in test_data:
        # Compute Euclidean distance between the test point and each centroid
        dist_0 = euclidean_distance(test_point, centroid_0)
        dist_1 = euclidean_distance(test_point, centroid_1)
        predictions.append(0 if dist_0 < dist_1 else 1)
    
    return torch.tensor(predictions)  # Return predictions as a PyTorch tensor

# Function to generate confusion matrix plot
def plot_confusion_matrix(y_true, y_pred, title):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(5, 5))
    plt.imshow(cm, cmap='Blues')
    plt.title(title)
    plt.xlabel('Predicted')
    plt.ylabel('Actual')
    plt.colorbar()

    # Add labels for x and y ticks (Actual/Predicted class labels)
    plt.xticks([0, 1], labels=[0, 1])
    plt.yticks([0, 1], labels=[0, 1])

    # Annotate the confusion matrix
    thresh = cm.max() / 2  # Define threshold to choose text color (black or white)
    for i in range(cm.shape[0]):
        for j in range(cm.shape[1]):
            plt.text(j, i, format(cm[i, j], 'd'),
                     ha="center", va="center",
                     color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.savefig(f"{title}.png")
    return Image.open(f"{title}.png")

def identical_train_test_split(output_emb, output_raw, labels, percentage_idx):
    N = output_emb.shape[0]  # Get the total number of samples
    
    # Generate the indices for shuffling and splitting
    indices = torch.randperm(N)  # Randomly shuffle the indices
    
    # Calculate the split index
    split_index = int(N * percentage_values_los[percentage_idx]/100)
    print(f'Training Size: {split_index}')
    
    # Split indices into train and test
    train_indices = indices[:split_index]  # First 80% for training
    test_indices = indices[split_index:]   # Remaining 20% for testing
    
    # Select the same indices from both output_emb and output_raw
    train_emb = output_emb[train_indices]
    test_emb = output_emb[test_indices]
    
    train_raw = output_raw[train_indices]
    test_raw = output_raw[test_indices]

    train_labels = labels[train_indices]
    test_labels = labels[test_indices]

    return train_emb, test_emb, train_raw, test_raw, train_labels, test_labels

# Store the original working directory when the app starts
original_dir = os.getcwd()

def process_hdf5_file(uploaded_file, percentage_idx):
    capture = PrintCapture()
    sys.stdout = capture  # Redirect print statements to capture
    
    try:
        model_repo_url = "https://huggingface.co/sadjadalikhani/LWM"
        model_repo_dir = "./LWM"

        # Step 1: Clone the repository if not already done
        if not os.path.exists(model_repo_dir):
            print(f"Cloning model repository from {model_repo_url}...")
            subprocess.run(["git", "clone", model_repo_url, model_repo_dir], check=True)

        # Step 2: Verify the repository was cloned and change the working directory
        repo_work_dir = os.path.join(original_dir, model_repo_dir)
        if os.path.exists(repo_work_dir):
            os.chdir(repo_work_dir)  # Change the working directory only once
            print(f"Changed working directory to {os.getcwd()}")
            print(f"Directory content: {os.listdir(os.getcwd())}")  # Debugging: Check repo content
        else:
            print(f"Directory {repo_work_dir} does not exist.")
            return
            
        # Step 3: Dynamically load lwm_model.py, input_preprocess.py, and inference.py
        lwm_model_path = os.path.join(os.getcwd(), 'lwm_model.py')
        input_preprocess_path = os.path.join(os.getcwd(), 'input_preprocess.py')
        inference_path = os.path.join(os.getcwd(), 'inference.py')

        # Load lwm_model
        lwm_model = load_module_from_path("lwm_model", lwm_model_path)

        # Load input_preprocess
        input_preprocess = load_module_from_path("input_preprocess", input_preprocess_path)

        # Load inference
        inference = load_module_from_path("inference", inference_path)

        # Step 4: Load the model from lwm_model module
        device = 'cuda' if torch.cuda.is_available() else 'cpu'
        print(f"Loading the LWM model on {device}...")
        model = lwm_model.LWM.from_pretrained(device=device).to(torch.float32)

        # Step 5: Load the HDF5 file and extract the channels and labels
        with h5py.File(uploaded_file.name, 'r') as f:
            channels = np.array(f['channels'])  # Assuming 'channels' dataset in the HDF5 file
            labels = np.array(f['labels'])  # Assuming 'labels' dataset in the HDF5 file
        print(f"Loaded dataset with {channels.shape[0]} samples.")

        # Step 7: Tokenize the data using the tokenizer from input_preprocess
        preprocessed_chs = input_preprocess.tokenizer(manual_data=channels)
        #print(preprocessed_chs[0][0][1])

        # Step 7: Perform inference using the functions from inference.py
        output_emb = inference.lwm_inference(preprocessed_chs, 'cls_emb', model)
        output_raw = inference.create_raw_dataset(preprocessed_chs, device)

        print(f"Output Embeddings Shape: {output_emb.shape}")
        print(f"Output Raw Shape: {output_raw.shape}")

        print(f'percentage_idx: {percentage_idx}')
        print(f'percentage_value: {percentage_values_los[percentage_idx]}')
        train_data_emb, test_data_emb, train_data_raw, test_data_raw, train_labels, test_labels = identical_train_test_split(output_emb.view(len(output_emb),-1),
                                                                                                                             output_raw.view(len(output_raw),-1),
                                                                                                                             labels,
                                                                                                                             percentage_idx)
        
        # Step 8: Perform classification using the Euclidean distance for both raw and embeddings
        print(f'train_data_emb: {train_data_emb.shape}')
        print(f'train_labels: {train_labels.shape}')
        print(f'test_data_emb: {test_data_emb.shape}')
        pred_raw = classify_based_on_distance(train_data_raw, train_labels, test_data_raw)
        pred_emb = classify_based_on_distance(train_data_emb, train_labels, test_data_emb)
        #print(f'pred_emb: {pred_emb}')
        #print(f'actual labels: {test_labels}')
        # Step 9: Generate confusion matrices for both raw and embeddings
        raw_cm_image = plot_confusion_matrix(test_labels, pred_raw, title="Confusion Matrix (Raw Channels)")
        emb_cm_image = plot_confusion_matrix(test_labels, pred_emb, title="Confusion Matrix (Embeddings)")

        return raw_cm_image, emb_cm_image, capture.get_output()

    except Exception as e:
        return str(e), str(e), capture.get_output()

    finally:
        # Always return to the original working directory after processing
        os.chdir(original_dir)
        sys.stdout = sys.__stdout__  # Reset print statements

######################## Define the Gradio interface ###############################
with gr.Blocks(css="""
    .slider-container {
        display: inline-block;
        margin-right: 50px;
        text-align: center;
    }
""") as demo:
    
    # Tab for Beam Prediction Task
    with gr.Tab("Beam Prediction Task"):
        gr.Markdown("### Beam Prediction Task")
        
        with gr.Row():
            with gr.Column():
                data_percentage_slider = gr.Slider(label="Data Percentage for Training", minimum=10, maximum=100, step=10, value=10)
                task_complexity_dropdown = gr.Dropdown(label="Task Complexity (Number of Beams)", choices=[16, 32, 64, 128, 256], value=16)

        with gr.Row():
            raw_img_bp = gr.Image(label="Raw Channels", type="pil", width=300, height=300)
            embeddings_img_bp = gr.Image(label="Embeddings", type="pil", width=300, height=300)

        # Update the confusion matrices whenever sliders change
        data_percentage_slider.change(fn=beam_prediction_task, inputs=[data_percentage_slider, task_complexity_dropdown], outputs=[raw_img_bp, embeddings_img_bp])
        task_complexity_dropdown.change(fn=beam_prediction_task, inputs=[data_percentage_slider, task_complexity_dropdown], outputs=[raw_img_bp, embeddings_img_bp])

    # Separate Tab for LoS/NLoS Classification Task
    with gr.Tab("LoS/NLoS Classification Task"):
        gr.Markdown("### LoS/NLoS Classification Task")

        # Radio button for user choice: predefined data or upload dataset
        choice_radio = gr.Radio(choices=["Use Default Dataset", "Upload Dataset"], label="Choose how to proceed", value="Use Default Dataset")
        
        # Dropdown for selecting percentage for predefined data
        #percentage_dropdown_los = gr.Dropdown(choices=[f"{value:.3f}" for value in percentage_values_los], value=f"{percentage_values_los[0]:.3f}", label="Percentage of Data for Training")
        percentage_dropdown_los = gr.Dropdown(choices=list(range(20)), value=0, label="Percentage of Data for Training")

        # File uploader for dataset (only visible if user chooses to upload a dataset)
        file_input = gr.File(label="Upload HDF5 Dataset", file_types=[".h5"], visible=False)

        # Confusion matrices display
        with gr.Row():
            raw_img_los = gr.Image(label="Raw Channels", type="pil", width=300, height=300)
            embeddings_img_los = gr.Image(label="Embeddings", type="pil", width=300, height=300)
            output_textbox = gr.Textbox(label="Console Output", lines=10)

        # Update the file uploader visibility based on user choice
        def toggle_file_input(choice):
            return gr.update(visible=(choice == "Upload Dataset"))
        
        choice_radio.change(fn=toggle_file_input, inputs=[choice_radio], outputs=file_input)

        # When user makes a choice, update the display
        choice_radio.change(fn=handle_user_choice, inputs=[choice_radio, percentage_dropdown_los, file_input], 
                            outputs=[raw_img_los, embeddings_img_los, output_textbox])

        # When percentage slider changes (for predefined data)
        percentage_dropdown_los.change(fn=handle_user_choice, inputs=[choice_radio, percentage_dropdown_los, file_input], 
                                       outputs=[raw_img_los, embeddings_img_los, output_textbox])

# Launch the app
if __name__ == "__main__":
    demo.launch()