# !pip install gradio from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from torch.utils.data import TensorDataset, DataLoader import torch.nn as nn import torch.optim as optim import torch # Visualize the simulated data import matplotlib.pyplot as plt import plotly.graph_objs as go import IPython import numpy as np from graphviz import Digraph import copy import plotly.graph_objs as go import torch import numpy as np import colorsys from functools import partial import gradio as gr # may requeire session restart import os import uuid from contextlib import contextmanager NETWORK_ORIENTAION = 'h' # 'h' for horizontal 'v' for vertical TEMP_DIR = "/content/temp" if not os.path.exists(TEMP_DIR): os.makedirs(TEMP_DIR) """## functions""" # @title generate data def simulate_clusters(noise=0.3,data_points=1000): assert data_points%4==0, 'Data points should be dived by 4' # Set random seed for reproducibility np.random.seed(0) # Define means and covariances for the Gaussian distributions means = [(-1, -1), (-1, 1), (1, -1), (1, 1)] covs = [np.eye(2) * noise for _ in means] # Small covariance for tight clusters # Generate samples for each cluster cluster_samples = [] for mean, cov in zip(means, covs): samples = np.random.multivariate_normal(mean, cov, data_points//4) cluster_samples.append(samples) # Concatenate all samples and create labels X = np.vstack(cluster_samples) y = np.array([i//(data_points//4) for i in range(data_points)]) # Assign labels based on cluster index # Clusters [(-1, -1), (1, 1)] have label 0, and [(-1, 1), (1, -1)] have label 1. y_adjusted = np.array([0 if i in [0, 3] else 1 for i in y]) # Split the adjusted dataset X_train_adj, X_test_adj, y_train_adj, y_test_adj = train_test_split(X, y_adjusted, test_size=0.2, random_state=42) # Normalize the features scaler_adj = StandardScaler() X_train_scaled_adj = scaler_adj.fit_transform(X_train_adj) X_test_scaled_adj = scaler_adj.transform(X_test_adj) # Convert to PyTorch tensors X_train_tensor_adj = torch.tensor(X_train_scaled_adj, dtype=torch.float32) y_train_tensor_adj = torch.tensor(y_train_adj, dtype=torch.long) X_test_tensor_adj = torch.tensor(X_test_scaled_adj, dtype=torch.float32) y_test_tensor_adj = torch.tensor(y_test_adj, dtype=torch.long) return X_train_tensor_adj,y_train_tensor_adj,X_test_tensor_adj,y_test_tensor_adj # @title plotting network with activation def get_color(activation, base_color=False): if base_color: # Convert base color from hex to RGB r_base, g_base, b_base = int(base_color[1:3], 16), int(base_color[3:5], 16), int(base_color[5:7], 16) # Interpolate between the base color and white based on activation r = r_base + (255 - r_base) * (1 - activation) g = g_base + (255 - g_base) * (1 - activation) b = b_base + (255 - b_base) * (1 - activation) return f'#{int(r):02x}{int(g):02x}{int(b):02x}' else: if activation > 0: return f"#0000FF{int(activation * 255):02X}" # Blue with varying intensity return "#E0E0E0" # Light gray for inactive neurons rd = lambda activation: ("\n"+"{:.2f}".format(torch.round(activation,decimals=2).item())) if activation!=1 else '' #sigmoid = lambda x: 1 / (1 + torch.exp(-x)) if x!=1 else 1 softmax = lambda x: torch.exp(x) / torch.sum(torch.exp(x), axis=0) if all(x!=1) else x rd = lambda activation: ("\n"+"{:.2f}".format(torch.round(activation,decimals=2).item())) if activation!=1 else '' def visualize_network_with_weights(model, activations=False, norm='net', decision_boundary_images=None, width=1, height=1): dot = Digraph() if NETWORK_ORIENTAION=='h': dot.attr(rankdir='LR') pos_color = "blue" neg_color = "orange" layers_weights = {} max_weight = 0 number_of_layer = 3 # Colors for different layers input_color, hidden_color, output_color1,output_color2 = '#90EE90','#D3D3D3', '#FFB6C1' , '#ADD8E6' # light grey, light green,light red, light blue # Extract weights for each layer and calculate max weight for normalization for name, layer in model.named_children(): if isinstance(layer, torch.nn.Linear): layer_weight = layer.weight.cpu().data.numpy() layers_weights[name] = layer_weight max_weight = max(max_weight, np.abs(layer_weight).max()) output_layer_name = name #this evantually save the output layer name # Initialize activations if not provided if not activations: activations = {layer: [1] * weight.shape[0] for layer, weight in layers_weights.items()} # Normalize weights for visualization purposes layers_weights_norm = {layer: weight / (np.abs(weight).max() if norm == 'layer' else max_weight) for layer, weight in layers_weights.items()} def add_node_with_border(node_id, label, base_color, activation, image_path=None, shape='circle', border_color='black', border_width=1): fill_color = get_color(activation, base_color) if image_path: dot.node(node_id, label, shape='box', style='filled', fillcolor=fill_color, color=border_color, penwidth=str(border_width),imagescale='both', width=str(width), height=str(height), image=image_path, fixedsize='true') else: dot.node(node_id, label, shape=shape, style='filled', fillcolor=fill_color, color=border_color, penwidth=str(border_width)) axis_names = ['X','Y'] # Add nodes and edges... for i in range(layers_weights['fc1'].shape[1]): add_node_with_border(f'h0_{i}' , f'X{i} - {axis_names[i]} Axis', input_color, 1.0) # Input nodes are always 'active' for layer_i in range(1,number_of_layer): layer_name = 'fc'+str(layer_i) for i, activation in enumerate(activations[layer_name]): image_path = decision_boundary_images[layer_name][i] if decision_boundary_images and layer_name in decision_boundary_images and len(decision_boundary_images[layer_name]) > i else None add_node_with_border(f'h{layer_i}_{i}', f'H{layer_i}_{i}{rd(activation)}', hidden_color, activation, image_path=image_path) norm_output_activations = softmax(torch.tensor([activations[output_layer_name][0],activations[output_layer_name][1]])) activation_label1,activation_label2 = norm_output_activations add_node_with_border(f'h{number_of_layer}_0', f"Y0 - Label 0{rd(activation_label1)}", output_color1, activation_label1,shape='doublecircle') add_node_with_border(f'h{number_of_layer}_1', f"Y1 - Label 1{rd(activation_label2)}", output_color2, activation_label2,shape='doublecircle') # Adding edges between layers prev_layer_size = layers_weights[list(layers_weights.keys())[0]].shape[1] # Size of the input layer prev_layer_name = 'h0' for layer_idx, (layer_name, weight_matrix) in enumerate(layers_weights.items(), start=1): current_layer_size = weight_matrix.shape[0] for i in range(prev_layer_size): for j in range(current_layer_size): color = pos_color if weight_matrix[j, i] >= 0 else neg_color dot.edge(f'{prev_layer_name}_{i}', f'h{layer_idx}_{j}', penwidth=str(abs(layers_weights_norm[layer_name][j, i]) * 5), color=color) prev_layer_size = current_layer_size prev_layer_name = f'h{layer_idx}' return dot # @title Plots (learning curve and decision boundary) def plot_decision_boundary(model, X_train, y_train, X_test, y_test, show=True, epoch=''): # Set model to evaluation mode model.eval() # Set min and max values and give it some padding x_min, x_max = min(X_train[:, 0].min(), X_test[:, 0].min()) - 1, max(X_train[:, 0].max(), X_test[:, 0].max()) + 1 y_min, y_max = min(X_train[:, 1].min(), X_test[:, 1].min()) - 1, max(X_train[:, 1].max(), X_test[:, 1].max()) + 1 h = 0.01 # Generate a grid of points with distance h between them xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) # Flatten the grid so the values match expected input grid = np.c_[xx.ravel(), yy.ravel()] grid_tensor = torch.FloatTensor(grid) with torch.no_grad(): predictions = model(grid_tensor.to(model.device)).argmax(1).to('cpu') Z = predictions.numpy().reshape(xx.shape) # Create the contour plot contour = go.Contour( x=np.arange(x_min, x_max, h), y=np.arange(y_min, y_max, h), z=Z, colorscale='RdYlBu', # Light colors for background showscale=False # Hide the colorbar ) # Separate data based on labels train_0 = X_train[y_train == 0] train_1 = X_train[y_train == 1] test_0 = X_test[y_test == 0] test_1 = X_test[y_test == 1] # Create scatter plots for each category train_0_scatter = go.Scatter(x=train_0[:, 0], y=train_0[:, 1], mode='markers', marker=dict(color='red', line=dict(color='black', width=1)), name='Train - Label 0') train_1_scatter = go.Scatter(x=train_1[:, 0], y=train_1[:, 1], mode='markers', marker=dict(color='green', line=dict(color='black', width=1)), name='Train - Label 1') test_0_scatter = go.Scatter(x=test_0[:, 0], y=test_0[:, 1], mode='markers', marker=dict(color='rgba(255, 200, 200, 1)', symbol='circle-open', line=dict(color='black', width=1)), name='Test - Label 0') test_1_scatter = go.Scatter(x=test_1[:, 0], y=test_1[:, 1], mode='markers', marker=dict(color='rgba(200, 255, 200, 1)', symbol='circle-open', line=dict(color='black', width=1)), name='Test - Label 1') # Define the layout layout = go.Layout( title='Decision Boundary ' + epoch, xaxis=dict(title='Feature 1'), yaxis=dict(title='Feature 2'), showlegend=True ) # Create the figure and add the contour and scatter plots fig = go.Figure(data=[contour, train_0_scatter, train_1_scatter, test_0_scatter, test_1_scatter], layout=layout) # Show the plot if show: fig.show() return fig def generate_learning_curve(loss_hist, loss_val_hist, hidden_units, noise, epochs, lr,metric): with torch.no_grad(): metric = 'Loss' if metric.lower()=='loss' else "Accuracy" # Create traces for the training and validation loss trace_train = go.Scatter( x=list(range(1, epochs + 1)), y=loss_hist, mode='lines', name=f'Training {metric}' ) trace_val = go.Scatter( x=list(range(1, epochs + 1)), y=loss_val_hist, mode='lines', name=f'Validation {metric}' ) # Combine traces data = [trace_train, trace_val] # Layout for the plot layout = go.Layout( title=f'Learning Curve - Hidden Units: {hidden_units}, Noise: {noise}, Learning Rate: {lr}', xaxis=dict(title='Epochs'), yaxis=dict(title=metric), ) # Create the figure and show it fig = go.Figure(data=data, layout=layout) return fig def save_plot_as_image(fig, remove_axes=True, remove_title=True, remove_colorbar=True, transparent_background=True): """ Saves a Matplotlib figure as an image and returns the path to the image. Args: fig (matplotlib.figure.Figure): The Matplotlib figure to save. remove_axes (bool): If True, removes the axes from the plot. remove_title (bool): If True, removes the title and header from the plot. remove_colorbar (bool): If True, removes the colorbar from the plot. transparent_background (bool): If True, saves the image with a transparent background. Returns: str: Path to the saved image file. """ # Check if fig is a valid Matplotlib figure if not isinstance(fig, plt.Figure): raise ValueError("The provided object is not a Matplotlib figure.") # Remove axes if requested if remove_axes: for ax in fig.axes: ax.get_xaxis().set_visible(False) ax.get_yaxis().set_visible(False) ax.set_frame_on(False) # Remove title and header if requested if remove_title: fig.suptitle("") for ax in fig.axes: ax.title.set_visible(False) # Remove colorbar if requested if remove_colorbar: for ax in fig.axes: if hasattr(ax, 'collections') and ax.collections: # Check for the presence of a colorbar in this axis for im in ax.get_images(): if hasattr(im, 'colorbar') and im.colorbar: im.colorbar.remove() # Set transparent background if requested if transparent_background: fig.patch.set_alpha(0) for ax in fig.axes: ax.patch.set_alpha(0) # Generate a unique filename for the image filename = f"plot_{uuid.uuid4()}.png" file_path = os.path.join(TEMP_DIR, filename) # Save the figure with a transparent background if requested fig.savefig(file_path, bbox_inches='tight', pad_inches=0, transparent=transparent_background) return file_path def plot_neuron_decision_boundaries(model, X, step=0.01): # Ensure X is a NumPy array if isinstance(X, torch.Tensor): X = X.cpu().numpy() mesh_border_expansion = 0.5 # the mesh is calculted between the highest and lowest values in each axis, with `mesh_border_expansion` additional space # Generate mesh grid for decision boundaries x_min, x_max = X[:, 0].min() - mesh_border_expansion , X[:, 0].max() + mesh_border_expansion y_min, y_max = X[:, 1].min() - mesh_border_expansion , X[:, 1].max() + mesh_border_expansion xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step)) mesh_inputs = torch.Tensor(np.c_[xx.ravel(), yy.ravel()]) model.eval() figures_dict = {} layer_outputs = mesh_inputs with torch.no_grad(): for name, layer in model.named_children(): # Apply the layer layer_outputs = layer(layer_outputs.to(model.device)) # Check if the layer is ReLU or the last layer if isinstance(layer, nn.Linear) or (name == list(model.named_children())[-1][0]): # Convert to NumPy for plotting outputs_np = layer_outputs.cpu().numpy() for neuron_idx in range(outputs_np.shape[1]): Z = outputs_np[:, neuron_idx].reshape(xx.shape) Z_min, Z_max = Z.min(), Z.max() levels = sorted([Z_min, 0, Z_max]) if Z_min < 0 < Z_max else [Z_min, Z_max] fig, ax = plt.subplots() # ax.contourf(xx, yy, Z, levels=np.linspace(Z.min(), Z.max(), 200), cmap=plt.cm.RdBu, alpha=0.8) ax.contourf(xx, yy, Z, levels=levels, cmap=plt.cm.RdBu, alpha=0.8) # ax.set_title(f"Decision boundary of Neuron {neuron_idx+1} in {name}") # ax.set_xlabel('Feature 1') # ax.set_ylabel('Feature 2') plt.show() plt.close(fig) if name not in figures_dict: figures_dict[name]=[] figures_dict[name] += [fig] return figures_dict # plot_neuron_decision_boundaries( fc_model, X_train) # step=0.01 # x_min, x_max = X_train[:, 0].min() - 1, X_train[:, 0].max() + 1 # y_min, y_max = X_train[:, 1].min() - 1, X_train[:, 1].max() + 1 # xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step)) # mesh_inputs = torch.Tensor(np.c_[xx.ravel(), yy.ravel()]) # mesh_inputs # @title network architecture and training # Global variables to hold model and data global fc_model_hist, X_train, y_train, X_test, y_test fc_model_hist, X_train, y_train, X_test, y_test = None, None, None, None, None class FCNet(nn.Module): def __init__(self,hidden_units,device): super(FCNet, self).__init__() self.fc1 = nn.Linear(2, hidden_units) # Input layer with 2 features self.act_func1 = nn.ReLU() # it is important to declare on each relu layer, becuase some of the plotting functions uses model.named_layers() and the ReLU won't be there without explicit declration here self.fc2 = nn.Linear(hidden_units, hidden_units) self.act_func2 = nn.ReLU() self.fc3 = nn.Linear(hidden_units, 2) # Output layer with 2 neurons (for 2 classes) self.device = device def forward(self, x): x = self.act_func1(self.fc1(x)) x = self.act_func2(self.fc2(x)) x = self.fc3(x) return x def forward_with_activation(self, x): inputs = x x1 = self.act_func1(self.fc1(x)) x2 = self.act_func2(self.fc2(x1)) x3 = self.fc3(x2) return x,{'inputs':inputs,'fc1':x1,'fc2':x2,'fc3':x3} def to(self, device): super().to(device) self.device = device return self def init_net_and_train(hidden_units = 4,noise = 0.2,epochs = 30,data_points = 1000,lr=0.01,device='cpu',metric='acc'): global fc_model_hist, X_train, y_train, X_test, y_test # Simulate the dataset X_train,y_train,X_test,y_test = simulate_clusters(noise,data_points) # Create TensorDataset and DataLoader train_dataset_adj = TensorDataset(X_train, y_train) train_loader_adj = DataLoader(train_dataset_adj, batch_size=64, shuffle=True) test_dataset_adj = TensorDataset(X_test, y_test) test_loader_adj = DataLoader(test_dataset_adj, batch_size=64, shuffle=True) # Define a simple Fully Connected network with fewer neurons # Initialize the simple fully connected neural network fc_model = FCNet(hidden_units,device=device) fc_model.to(device) # Loss and optimizer for the FC network fc_criterion = nn.CrossEntropyLoss() fc_optimizer = optim.Adam(fc_model.parameters(), lr=lr) # Training loop for the simple FC network fc_model_hist = [] # loss_hist = [] # loss_val_hist = [] # for epoch in range(epochs): # cur_epoch_loss=torch.tensor(0.,device=fc_model.device) # inputs_len = 0 # for inputs, labels in train_loader_adj: # # Forward pass # outputs = fc_model(inputs.to(device)) # loss = fc_criterion(outputs, labels.to(device)) # cur_epoch_loss+=loss # inputs_len += labels.shape[0] # # Backward and optimize # fc_optimizer.zero_grad() # loss.backward() # fc_optimizer.step() # train_loss = cur_epoch_loss.cpu()/inputs_len # loss_hist.append(train_loss) # fc_model_hist.append(copy.deepcopy(fc_model).to('cpu')) # with torch.no_grad(): # cur_epoch_loss=torch.tensor(0.,device=device) # inputs_len = 0 # for inputs, labels in test_loader_adj: # outputs = fc_model(inputs.to(device)) # loss = fc_criterion(outputs, labels.to(device)) # cur_epoch_loss+=loss # inputs_len += labels.shape[0] # test_loss = cur_epoch_loss.cpu()/inputs_len # loss_val_hist.append(test_loss) loss_hist = [] loss_val_hist = [] acc_hist = [] acc_val_hist = [] device = fc_model.device for epoch in range(epochs): fc_model.train() # Set model to training mode cur_epoch_loss = 0 correct_train = 0 total_train = 0 for inputs, labels in train_loader_adj: inputs, labels = inputs.to(device), labels.to(device) fc_optimizer.zero_grad() outputs = fc_model(inputs) loss = fc_criterion(outputs, labels) loss.backward() fc_optimizer.step() cur_epoch_loss += loss.item() * inputs.size(0) _, predicted = torch.max(outputs.data, 1) total_train += labels.size(0) correct_train += (predicted == labels).sum().item() train_loss = cur_epoch_loss / total_train train_accuracy = correct_train / total_train loss_hist.append(train_loss) acc_hist.append(train_accuracy) fc_model.eval() # Set model to evaluation mode for validation fc_model_hist.append(copy.deepcopy(fc_model).to('cpu')) cur_epoch_loss = 0 correct_test = 0 total_test = 0 with torch.no_grad(): for inputs, labels in test_loader_adj: inputs, labels = inputs.to(device), labels.to(device) outputs = fc_model(inputs) loss = fc_criterion(outputs, labels) cur_epoch_loss += loss.item() * inputs.size(0) _, predicted = torch.max(outputs.data, 1) total_test += labels.size(0) correct_test += (predicted == labels).sum().item() test_loss = cur_epoch_loss / total_test test_accuracy = correct_test / total_test loss_val_hist.append(test_loss) acc_val_hist.append(test_accuracy) # print(f'Epoch [{epoch+1}/{epochs}], Train Loss: {train_loss:.4f}, Test Loss: {test_loss:.4f}') # return fc_model,fc_model_hist,loss_hist,X_train,y_train,X_test,y_test if metric=='acc': reported_metric_train,reported_metric_val = acc_hist,acc_val_hist else: reported_metric_train,reported_metric_val = loss_hist,loss_val_hist return generate_learning_curve(reported_metric_train,reported_metric_val,hidden_units,noise,epochs,lr,metric) # @title functions for retriving app images def get_network_with_inputs(epoch, input_x, input_y,output_type = "HTML"): if epoch>len(fc_model_hist): epoch = len(fc_model_hist) with torch.no_grad(): cur_model = fc_model_hist[epoch - 1] out, activations = cur_model.forward_with_activation(torch.tensor([input_x, input_y], dtype=torch.float32,device=cur_model.device)) network_dot = visualize_network_with_weights(cur_model, activations=activations) if output_type=='PNG': cur_path = f'network_with_weights_activation_{epoch}' network_dot.render(cur_path, format='png', cleanup=True) return cur_path + ".png" else: svg_content = network_dot.pipe(format='svg').decode('utf-8') # Create HTML content embedding the SVG html_content = f'