# !pip install gradio

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from torch.utils.data import TensorDataset, DataLoader
import torch.nn as nn
import torch.optim as optim
import torch
# Visualize the simulated data
import matplotlib.pyplot as plt
import plotly.graph_objs as go
import IPython
import numpy as np
from graphviz import Digraph
import copy
import plotly.graph_objs as go
import torch
import numpy as np
import colorsys
from functools import partial
import gradio as gr # may requeire session restart
import os
import uuid
from contextlib import contextmanager
NETWORK_ORIENTAION = 'h' # 'h' for horizontal 'v' for vertical
TEMP_DIR = "/content/temp"
if not os.path.exists(TEMP_DIR):
    os.makedirs(TEMP_DIR)

"""## functions"""

# @title generate data

def simulate_clusters(noise=0.3,data_points=1000):
    assert data_points%4==0, 'Data points should be dived by 4'
    # Set random seed for reproducibility
    np.random.seed(0)

    # Define means and covariances for the Gaussian distributions
    means = [(-1, -1), (-1, 1), (1, -1), (1, 1)]
    covs = [np.eye(2) * noise for _ in means]  # Small covariance for tight clusters

    # Generate samples for each cluster
    cluster_samples = []
    for mean, cov in zip(means, covs):
        samples = np.random.multivariate_normal(mean, cov, data_points//4)
        cluster_samples.append(samples)

    # Concatenate all samples and create labels
    X = np.vstack(cluster_samples)
    y = np.array([i//(data_points//4) for i in range(data_points)])  # Assign labels based on cluster index
    # Clusters [(-1, -1), (1, 1)] have label 0, and [(-1, 1), (1, -1)] have label 1.
    y_adjusted = np.array([0 if i in [0, 3] else 1 for i in y])

    # Split the adjusted dataset
    X_train_adj, X_test_adj, y_train_adj, y_test_adj = train_test_split(X, y_adjusted, test_size=0.2, random_state=42)

    # Normalize the features
    scaler_adj = StandardScaler()
    X_train_scaled_adj = scaler_adj.fit_transform(X_train_adj)
    X_test_scaled_adj = scaler_adj.transform(X_test_adj)

    # Convert to PyTorch tensors
    X_train_tensor_adj = torch.tensor(X_train_scaled_adj, dtype=torch.float32)
    y_train_tensor_adj = torch.tensor(y_train_adj, dtype=torch.long)
    X_test_tensor_adj = torch.tensor(X_test_scaled_adj, dtype=torch.float32)
    y_test_tensor_adj = torch.tensor(y_test_adj, dtype=torch.long)

    return X_train_tensor_adj,y_train_tensor_adj,X_test_tensor_adj,y_test_tensor_adj

# @title plotting network with activation
def get_color(activation, base_color=False):
    if base_color:
        # Convert base color from hex to RGB
        r_base, g_base, b_base = int(base_color[1:3], 16), int(base_color[3:5], 16), int(base_color[5:7], 16)

        # Interpolate between the base color and white based on activation
        r = r_base + (255 - r_base) * (1 - activation)
        g = g_base + (255 - g_base) * (1 - activation)
        b = b_base + (255 - b_base) * (1 - activation)

        return f'#{int(r):02x}{int(g):02x}{int(b):02x}'


    else:
        if activation > 0:
            return f"#0000FF{int(activation * 255):02X}"  # Blue with varying intensity
        return "#E0E0E0"  # Light gray for inactive neurons


rd = lambda activation: ("\n"+"{:.2f}".format(torch.round(activation,decimals=2).item())) if activation!=1 else ''
#sigmoid = lambda x: 1 / (1 + torch.exp(-x)) if x!=1 else 1
softmax = lambda x: torch.exp(x) / torch.sum(torch.exp(x), axis=0) if all(x!=1) else x


rd = lambda activation: ("\n"+"{:.2f}".format(torch.round(activation,decimals=2).item())) if activation!=1 else ''
def visualize_network_with_weights(model, activations=False, norm='net', decision_boundary_images=None, width=1, height=1):
    dot = Digraph()
    if NETWORK_ORIENTAION=='h':
        dot.attr(rankdir='LR')
    pos_color = "blue"
    neg_color = "orange"
    layers_weights = {}
    max_weight = 0
    number_of_layer = 3
    # Colors for different layers
    input_color, hidden_color, output_color1,output_color2 = '#90EE90','#D3D3D3', '#FFB6C1' ,  '#ADD8E6' # light grey, light green,light red, light blue

    # Extract weights for each layer and calculate max weight for normalization
    for name, layer in model.named_children():
        if isinstance(layer, torch.nn.Linear):
            layer_weight = layer.weight.cpu().data.numpy()
            layers_weights[name] = layer_weight
            max_weight = max(max_weight, np.abs(layer_weight).max())
            output_layer_name = name #this evantually save the output layer name
    # Initialize activations if not provided
    if not activations:
        activations = {layer: [1] * weight.shape[0] for layer, weight in layers_weights.items()}

    # Normalize weights for visualization purposes
    layers_weights_norm = {layer: weight / (np.abs(weight).max() if norm == 'layer' else max_weight)
                           for layer, weight in layers_weights.items()}
    def add_node_with_border(node_id, label, base_color, activation, image_path=None, shape='circle', border_color='black', border_width=1):
        fill_color = get_color(activation, base_color)
        if image_path:
            dot.node(node_id, label, shape='box', style='filled', fillcolor=fill_color, color=border_color, penwidth=str(border_width),imagescale='both', width=str(width), height=str(height), image=image_path, fixedsize='true')
        else:
            dot.node(node_id, label, shape=shape, style='filled', fillcolor=fill_color, color=border_color, penwidth=str(border_width))
    axis_names = ['X','Y']
    # Add nodes and edges...
    for i in range(layers_weights['fc1'].shape[1]):
        add_node_with_border(f'h0_{i}' , f'X{i} - {axis_names[i]} Axis', input_color, 1.0)  # Input nodes are always 'active'

    for layer_i in range(1,number_of_layer):
        layer_name = 'fc'+str(layer_i)
        for i, activation in enumerate(activations[layer_name]):
            image_path = decision_boundary_images[layer_name][i] if decision_boundary_images and layer_name in decision_boundary_images and len(decision_boundary_images[layer_name]) > i else None
            add_node_with_border(f'h{layer_i}_{i}', f'H{layer_i}_{i}{rd(activation)}', hidden_color, activation, image_path=image_path)
    norm_output_activations  = softmax(torch.tensor([activations[output_layer_name][0],activations[output_layer_name][1]]))
    activation_label1,activation_label2 = norm_output_activations
    add_node_with_border(f'h{number_of_layer}_0', f"Y0 - Label 0{rd(activation_label1)}", output_color1, activation_label1,shape='doublecircle')
    add_node_with_border(f'h{number_of_layer}_1', f"Y1 - Label 1{rd(activation_label2)}", output_color2, activation_label2,shape='doublecircle')


    # Adding edges between layers
    prev_layer_size = layers_weights[list(layers_weights.keys())[0]].shape[1]  # Size of the input layer
    prev_layer_name = 'h0'

    for layer_idx, (layer_name, weight_matrix) in enumerate(layers_weights.items(), start=1):
        current_layer_size = weight_matrix.shape[0]

        for i in range(prev_layer_size):
            for j in range(current_layer_size):
                color = pos_color if weight_matrix[j, i] >= 0 else neg_color
                dot.edge(f'{prev_layer_name}_{i}', f'h{layer_idx}_{j}', penwidth=str(abs(layers_weights_norm[layer_name][j, i]) * 5), color=color)

        prev_layer_size = current_layer_size
        prev_layer_name = f'h{layer_idx}'

    return dot

# @title Plots (learning curve and decision boundary)
def plot_decision_boundary(model, X_train, y_train, X_test, y_test, show=True, epoch=''):
    # Set model to evaluation mode
    model.eval()

    # Set min and max values and give it some padding
    x_min, x_max = min(X_train[:, 0].min(), X_test[:, 0].min()) - 1, max(X_train[:, 0].max(), X_test[:, 0].max()) + 1
    y_min, y_max = min(X_train[:, 1].min(), X_test[:, 1].min()) - 1, max(X_train[:, 1].max(), X_test[:, 1].max()) + 1
    h = 0.01

    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))

    # Flatten the grid so the values match expected input
    grid = np.c_[xx.ravel(), yy.ravel()]
    grid_tensor = torch.FloatTensor(grid)
    with torch.no_grad():
        predictions = model(grid_tensor.to(model.device)).argmax(1).to('cpu')
    Z = predictions.numpy().reshape(xx.shape)

    # Create the contour plot
    contour = go.Contour(
        x=np.arange(x_min, x_max, h),
        y=np.arange(y_min, y_max, h),
        z=Z,
        colorscale='RdYlBu',  # Light colors for background
        showscale=False  # Hide the colorbar
    )

    # Separate data based on labels
    train_0 = X_train[y_train == 0]
    train_1 = X_train[y_train == 1]
    test_0 = X_test[y_test == 0]
    test_1 = X_test[y_test == 1]

    # Create scatter plots for each category
    train_0_scatter = go.Scatter(x=train_0[:, 0], y=train_0[:, 1], mode='markers',
                                 marker=dict(color='red', line=dict(color='black', width=1)),
                                 name='Train - Label 0')
    train_1_scatter = go.Scatter(x=train_1[:, 0], y=train_1[:, 1], mode='markers',
                                 marker=dict(color='green', line=dict(color='black', width=1)),
                                 name='Train - Label 1')
    test_0_scatter = go.Scatter(x=test_0[:, 0], y=test_0[:, 1], mode='markers',
                                marker=dict(color='rgba(255, 200, 200, 1)', symbol='circle-open', line=dict(color='black', width=1)),
                                name='Test - Label 0')
    test_1_scatter = go.Scatter(x=test_1[:, 0], y=test_1[:, 1], mode='markers',
                                marker=dict(color='rgba(200, 255, 200, 1)', symbol='circle-open', line=dict(color='black', width=1)),
                                name='Test - Label 1')

    # Define the layout
    layout = go.Layout(
        title='Decision Boundary ' + epoch,
        xaxis=dict(title='Feature 1'),
        yaxis=dict(title='Feature 2'),
        showlegend=True
    )
    # Create the figure and add the contour and scatter plots
    fig = go.Figure(data=[contour, train_0_scatter, train_1_scatter, test_0_scatter, test_1_scatter], layout=layout)

    # Show the plot
    if show: fig.show()
    return fig


def generate_learning_curve(loss_hist, loss_val_hist, hidden_units, noise, epochs, lr,metric):
    with torch.no_grad():
        metric = 'Loss' if metric.lower()=='loss' else "Accuracy"
        # Create traces for the training and validation loss
        trace_train = go.Scatter(
            x=list(range(1, epochs + 1)),
            y=loss_hist,
            mode='lines',
            name=f'Training {metric}'
        )
        trace_val = go.Scatter(
            x=list(range(1, epochs + 1)),
            y=loss_val_hist,
            mode='lines',
            name=f'Validation {metric}'
        )

        # Combine traces
        data = [trace_train, trace_val]

        # Layout for the plot
        layout = go.Layout(
            title=f'Learning Curve - Hidden Units: {hidden_units}, Noise: {noise}, Learning Rate: {lr}',
            xaxis=dict(title='Epochs'),
            yaxis=dict(title=metric),

        )

    # Create the figure and show it
    fig = go.Figure(data=data, layout=layout)
    return fig

def save_plot_as_image(fig, remove_axes=True, remove_title=True, remove_colorbar=True, transparent_background=True):
    """
    Saves a Matplotlib figure as an image and returns the path to the image.

    Args:
    fig (matplotlib.figure.Figure): The Matplotlib figure to save.
    remove_axes (bool): If True, removes the axes from the plot.
    remove_title (bool): If True, removes the title and header from the plot.
    remove_colorbar (bool): If True, removes the colorbar from the plot.
    transparent_background (bool): If True, saves the image with a transparent background.

    Returns:
    str: Path to the saved image file.
    """
    # Check if fig is a valid Matplotlib figure
    if not isinstance(fig, plt.Figure):
        raise ValueError("The provided object is not a Matplotlib figure.")

    # Remove axes if requested
    if remove_axes:
        for ax in fig.axes:
            ax.get_xaxis().set_visible(False)
            ax.get_yaxis().set_visible(False)
            ax.set_frame_on(False)

    # Remove title and header if requested
    if remove_title:
        fig.suptitle("")
        for ax in fig.axes:
            ax.title.set_visible(False)

    # Remove colorbar if requested
    if remove_colorbar:
        for ax in fig.axes:
            if hasattr(ax, 'collections') and ax.collections:
                # Check for the presence of a colorbar in this axis
                for im in ax.get_images():
                    if hasattr(im, 'colorbar') and im.colorbar:
                        im.colorbar.remove()

    # Set transparent background if requested
    if transparent_background:
        fig.patch.set_alpha(0)
        for ax in fig.axes:
            ax.patch.set_alpha(0)


    # Generate a unique filename for the image
    filename = f"plot_{uuid.uuid4()}.png"
    file_path = os.path.join(TEMP_DIR, filename)

    # Save the figure with a transparent background if requested
    fig.savefig(file_path, bbox_inches='tight', pad_inches=0, transparent=transparent_background)

    return file_path

def plot_neuron_decision_boundaries(model, X, step=0.01):
    # Ensure X is a NumPy array
    if isinstance(X, torch.Tensor):
        X = X.cpu().numpy()
    mesh_border_expansion = 0.5 # the mesh is calculted between the highest and lowest values in each axis, with `mesh_border_expansion` additional space
    # Generate mesh grid for decision boundaries
    x_min, x_max = X[:, 0].min() - mesh_border_expansion , X[:, 0].max() + mesh_border_expansion
    y_min, y_max = X[:, 1].min() - mesh_border_expansion , X[:, 1].max() + mesh_border_expansion
    xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
    mesh_inputs = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])

    model.eval()
    figures_dict = {}
    layer_outputs = mesh_inputs
    with torch.no_grad():
        for name, layer in model.named_children():
            # Apply the layer
            layer_outputs = layer(layer_outputs.to(model.device))

            # Check if the layer is ReLU or the last layer
            if isinstance(layer, nn.Linear) or (name == list(model.named_children())[-1][0]):
                # Convert to NumPy for plotting
                outputs_np = layer_outputs.cpu().numpy()
                for neuron_idx in range(outputs_np.shape[1]):
                    Z = outputs_np[:, neuron_idx].reshape(xx.shape)

                    Z_min, Z_max = Z.min(), Z.max()
                    levels = sorted([Z_min, 0, Z_max]) if Z_min < 0 < Z_max else [Z_min, Z_max]

                    fig, ax = plt.subplots()
                    # ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), Z.max(), 200), cmap=plt.cm.RdBu, alpha=0.8)
                    ax.contourf(xx, yy, Z, levels=levels, cmap=plt.cm.RdBu, alpha=0.8)
                    # ax.set_title(f"Decision boundary of Neuron {neuron_idx+1} in {name}")
                    # ax.set_xlabel('Feature 1')
                    # ax.set_ylabel('Feature 2')
                    plt.show()
                    plt.close(fig)
                    if name not in figures_dict:
                        figures_dict[name]=[]
                    figures_dict[name] += [fig]

    return figures_dict


# plot_neuron_decision_boundaries(  fc_model, X_train)

# step=0.01
# x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1
# y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1
# xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step))
# mesh_inputs = torch.Tensor(np.c_[xx.ravel(), yy.ravel()])
# mesh_inputs

# @title network architecture and training

# Global variables to hold model and data
global fc_model_hist, X_train, y_train, X_test, y_test
fc_model_hist, X_train, y_train, X_test, y_test = None, None, None, None, None

class FCNet(nn.Module):
    def __init__(self,hidden_units,device):
        super(FCNet, self).__init__()
        self.fc1 = nn.Linear(2, hidden_units)  # Input layer with 2 features
        self.act_func1 = nn.ReLU() # it is important to declare on each relu layer, becuase some of the plotting functions uses model.named_layers() and the ReLU won't be there without explicit declration here
        self.fc2 = nn.Linear(hidden_units, hidden_units)
        self.act_func2 = nn.ReLU()
        self.fc3 = nn.Linear(hidden_units, 2)  # Output layer with 2 neurons (for 2 classes)
        self.device = device
    def forward(self, x):
        x = self.act_func1(self.fc1(x))
        x = self.act_func2(self.fc2(x))
        x = self.fc3(x)
        return x
    def forward_with_activation(self, x):
        inputs = x
        x1 = self.act_func1(self.fc1(x))
        x2 = self.act_func2(self.fc2(x1))
        x3 = self.fc3(x2)
        return x,{'inputs':inputs,'fc1':x1,'fc2':x2,'fc3':x3}
    def to(self, device):
        super().to(device)
        self.device = device
        return self

def init_net_and_train(hidden_units = 4,noise = 0.2,epochs = 30,data_points = 1000,lr=0.01,device='cpu',metric='acc'):
    global fc_model_hist, X_train, y_train, X_test, y_test
    # Simulate the dataset
    X_train,y_train,X_test,y_test = simulate_clusters(noise,data_points)

    # Create TensorDataset and DataLoader
    train_dataset_adj = TensorDataset(X_train, y_train)
    train_loader_adj = DataLoader(train_dataset_adj, batch_size=64, shuffle=True)
    test_dataset_adj = TensorDataset(X_test, y_test)
    test_loader_adj = DataLoader(test_dataset_adj, batch_size=64, shuffle=True)
    # Define a simple Fully Connected network with fewer neurons
    # Initialize the simple fully connected neural network
    fc_model = FCNet(hidden_units,device=device)
    fc_model.to(device)
    # Loss and optimizer for the  FC network
    fc_criterion = nn.CrossEntropyLoss()
    fc_optimizer = optim.Adam(fc_model.parameters(), lr=lr)

    # Training loop for the simple FC network
    fc_model_hist = []

    # loss_hist = []
    # loss_val_hist = []

    # for epoch in range(epochs):
    #     cur_epoch_loss=torch.tensor(0.,device=fc_model.device)
    #     inputs_len = 0
    #     for inputs, labels in train_loader_adj:
    #         # Forward pass
    #         outputs = fc_model(inputs.to(device))
    #         loss = fc_criterion(outputs, labels.to(device))
    #         cur_epoch_loss+=loss
    #         inputs_len += labels.shape[0]
    #         # Backward and optimize
    #         fc_optimizer.zero_grad()
    #         loss.backward()
    #         fc_optimizer.step()
    #     train_loss = cur_epoch_loss.cpu()/inputs_len
    #     loss_hist.append(train_loss)
    #     fc_model_hist.append(copy.deepcopy(fc_model).to('cpu'))
    #     with torch.no_grad():
    #         cur_epoch_loss=torch.tensor(0.,device=device)
    #         inputs_len = 0
    #         for inputs, labels in test_loader_adj:
    #             outputs = fc_model(inputs.to(device))
    #             loss = fc_criterion(outputs, labels.to(device))
    #             cur_epoch_loss+=loss
    #             inputs_len += labels.shape[0]
    #         test_loss = cur_epoch_loss.cpu()/inputs_len
    #         loss_val_hist.append(test_loss)

    loss_hist = []
    loss_val_hist = []
    acc_hist = []
    acc_val_hist = []

    device = fc_model.device

    for epoch in range(epochs):
        fc_model.train()  # Set model to training mode
        cur_epoch_loss = 0
        correct_train = 0
        total_train = 0

        for inputs, labels in train_loader_adj:
            inputs, labels = inputs.to(device), labels.to(device)
            fc_optimizer.zero_grad()
            outputs = fc_model(inputs)
            loss = fc_criterion(outputs, labels)
            loss.backward()
            fc_optimizer.step()

            cur_epoch_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total_train += labels.size(0)
            correct_train += (predicted == labels).sum().item()

        train_loss = cur_epoch_loss / total_train
        train_accuracy = correct_train / total_train
        loss_hist.append(train_loss)
        acc_hist.append(train_accuracy)

        fc_model.eval()  # Set model to evaluation mode for validation
        fc_model_hist.append(copy.deepcopy(fc_model).to('cpu'))
        cur_epoch_loss = 0
        correct_test = 0
        total_test = 0

        with torch.no_grad():
            for inputs, labels in test_loader_adj:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = fc_model(inputs)
                loss = fc_criterion(outputs, labels)

                cur_epoch_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs.data, 1)
                total_test += labels.size(0)
                correct_test += (predicted == labels).sum().item()

        test_loss = cur_epoch_loss / total_test
        test_accuracy = correct_test / total_test
        loss_val_hist.append(test_loss)
        acc_val_hist.append(test_accuracy)


        # print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}')

    # return fc_model,fc_model_hist,loss_hist,X_train,y_train,X_test,y_test
    if metric=='acc':
        reported_metric_train,reported_metric_val = acc_hist,acc_val_hist
    else:
        reported_metric_train,reported_metric_val = loss_hist,loss_val_hist
    return generate_learning_curve(reported_metric_train,reported_metric_val,hidden_units,noise,epochs,lr,metric)

# @title functions for retriving app images
def get_network_with_inputs(epoch, input_x, input_y,output_type = "HTML"):
    if epoch>len(fc_model_hist):
        epoch = len(fc_model_hist)
    with torch.no_grad():
        cur_model = fc_model_hist[epoch - 1]
        out, activations = cur_model.forward_with_activation(torch.tensor([input_x, input_y], dtype=torch.float32,device=cur_model.device))
        network_dot = visualize_network_with_weights(cur_model, activations=activations)
    if output_type=='PNG':
        cur_path = f'network_with_weights_activation_{epoch}'
        network_dot.render(cur_path, format='png', cleanup=True)
        return cur_path + ".png"
    else:
        svg_content = network_dot.pipe(format='svg').decode('utf-8')
        # Create HTML content embedding the SVG
        html_content = f'<div style="width:100%; height:100%;">{svg_content}</div>'
        return html_content


get_plots_as_png = lambda des_list: [save_plot_as_image(plot) for plot in des_list]


as_HTML=False

def generate_images(epoch,net_with_unit_decisions=True):
    global fc_model_hist
    if epoch>len(fc_model_hist):
        epoch = len(fc_model_hist)
    fig = plot_decision_boundary(fc_model_hist[epoch-1], X_train, y_train, X_test, y_test, show=False,epoch=f'Epoch:{epoch}')
    # network_html = network_dot_paths_list[epoch]
    if not net_with_unit_decisions:
        network_dot = visualize_network_with_weights(fc_model_hist[epoch-1])
    else:
        decision_plots = plot_neuron_decision_boundaries(fc_model_hist[epoch-1], X_train)
        decision_boundary_images = {k:get_plots_as_png(decision_plots[k]) for k in decision_plots}
        network_dot = visualize_network_with_weights(fc_model_hist[epoch-1], activations=False, decision_boundary_images=decision_boundary_images)
    if as_HTML:
        svg_content = network_dot.pipe(format='svg').decode('utf-8')
        network_proccessed = f'<div style="width:100%; height:100%;">{svg_content}</div>'
    else:
        cur_path = f'{TEMP_DIR}/network_with_weights_activation_{epoch}'
        network_dot.render(cur_path, format='png', cleanup=True)
        network_proccessed = cur_path+".png"

    return fig, network_proccessed

@contextmanager
def dummy_context():
    yield