Spaces:
Sleeping
Sleeping
import torch | |
import torch.nn as nn | |
import numpy as np | |
from sklearn.datasets import make_classification | |
from sklearn.preprocessing import StandardScaler | |
import matplotlib.pyplot as plt | |
import networkx as nx | |
# Supported activations | |
ACTIVATION_MAP = { | |
'ReLU': nn.ReLU(), | |
'Tanh': nn.Tanh(), | |
'Sigmoid': nn.Sigmoid(), | |
'LeakyReLU': nn.LeakyReLU(), | |
'Identity': nn.Identity() | |
} | |
class MLP(nn.Module): | |
def __init__(self, input_size, hidden_sizes, output_size, activations): | |
super(MLP, self).__init__() | |
self.layers = nn.ModuleList() | |
self.activations = [] | |
# Input layer | |
self.layers.append(nn.Linear(input_size, hidden_sizes[0])) | |
self.activations.append(ACTIVATION_MAP[activations[0]]) | |
# Hidden layers | |
for i in range(len(hidden_sizes)-1): | |
self.layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i+1])) | |
self.activations.append(ACTIVATION_MAP[activations[i+1]]) | |
# Output layer | |
self.layers.append(nn.Linear(hidden_sizes[-1], output_size)) | |
self.activations.append(ACTIVATION_MAP['Identity']) # No activation for output | |
self.softmax = nn.Softmax(dim=1) | |
def forward(self, x): | |
for i, layer in enumerate(self.layers[:-1]): | |
x = self.activations[i](layer(x)) | |
x = self.layers[-1](x) | |
return self.softmax(x) | |
def generate_dataset(n_samples, n_features, n_classes, random_state=42): | |
X, y = make_classification( | |
n_samples=n_samples, | |
n_features=n_features, | |
n_classes=n_classes, | |
n_informative=n_features, | |
n_redundant=0, | |
random_state=random_state | |
) | |
# Scale the features | |
scaler = StandardScaler() | |
X = scaler.fit_transform(X) | |
return X, y | |
def split_data(X, y, val_pct, test_pct, random_state=42): | |
np.random.seed(random_state) | |
n = X.shape[0] | |
idx = np.random.permutation(n) | |
n_test = int(n * test_pct) | |
n_val = int(n * val_pct) | |
n_train = n - n_val - n_test | |
train_idx = idx[:n_train] | |
val_idx = idx[n_train:n_train+n_val] | |
test_idx = idx[n_train+n_val:] | |
return (X[train_idx], y[train_idx]), (X[val_idx], y[val_idx]), (X[test_idx], y[test_idx]) | |
def train_model(model, X_train, y_train, X_val, y_val, epochs, learning_rate, batch_size=32, track_weights=False): | |
criterion = nn.CrossEntropyLoss() | |
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) | |
X_train_tensor = torch.FloatTensor(X_train) | |
y_train_tensor = torch.LongTensor(y_train) | |
X_val_tensor = torch.FloatTensor(X_val) | |
y_val_tensor = torch.LongTensor(y_val) | |
n_samples = X_train.shape[0] | |
train_losses = [] | |
train_accuracies = [] | |
val_losses = [] | |
val_accuracies = [] | |
weights_history = [] | |
for epoch in range(epochs): | |
# Training phase | |
model.train() | |
indices = torch.randperm(n_samples) | |
X_shuffled = X_train_tensor[indices] | |
y_shuffled = y_train_tensor[indices] | |
epoch_train_loss = 0 | |
train_correct = 0 | |
# Mini-batch training | |
for i in range(0, n_samples, batch_size): | |
batch_X = X_shuffled[i:i+batch_size] | |
batch_y = y_shuffled[i:i+batch_size] | |
optimizer.zero_grad() | |
outputs = model(batch_X) | |
loss = criterion(outputs, batch_y) | |
loss.backward() | |
optimizer.step() | |
epoch_train_loss += loss.item() | |
# Calculate training accuracy | |
_, predicted = torch.max(outputs.data, 1) | |
train_correct += (predicted == batch_y).sum().item() | |
# Calculate average training loss and accuracy | |
avg_train_loss = epoch_train_loss / (n_samples / batch_size) | |
train_accuracy = train_correct / n_samples | |
# Validation phase | |
model.eval() | |
with torch.no_grad(): | |
val_outputs = model(X_val_tensor) | |
val_loss = criterion(val_outputs, y_val_tensor) | |
_, val_predicted = torch.max(val_outputs.data, 1) | |
val_correct = (val_predicted == y_val_tensor).sum().item() | |
val_accuracy = val_correct / len(y_val) | |
train_losses.append(avg_train_loss) | |
train_accuracies.append(train_accuracy) | |
val_losses.append(val_loss.item()) | |
val_accuracies.append(val_accuracy) | |
if track_weights: | |
weights_history.append(model.layers[0].weight.detach().cpu().numpy().copy()) | |
return (train_losses, train_accuracies, val_losses, val_accuracies, weights_history) if track_weights else (train_losses, train_accuracies, val_losses, val_accuracies) | |
def plot_training_history(train_losses, train_accuracies, val_losses, val_accuracies): | |
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4)) | |
# Plot losses | |
ax1.plot(train_losses, label='Training Loss') | |
ax1.plot(val_losses, label='Validation Loss') | |
ax1.set_title('Training and Validation Loss') | |
ax1.set_xlabel('Epoch') | |
ax1.set_ylabel('Loss') | |
ax1.legend() | |
# Plot accuracies | |
ax2.plot(train_accuracies, label='Training Accuracy') | |
ax2.plot(val_accuracies, label='Validation Accuracy') | |
ax2.set_title('Training and Validation Accuracy') | |
ax2.set_xlabel('Epoch') | |
ax2.set_ylabel('Accuracy') | |
ax2.legend() | |
plt.tight_layout() | |
return fig | |
def plot_confusion_matrix(y_true, y_pred, n_classes): | |
from sklearn.metrics import confusion_matrix | |
import seaborn as sns | |
cm = confusion_matrix(y_true, y_pred) | |
plt.figure(figsize=(8, 6)) | |
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', | |
xticklabels=[f'Class {i}' for i in range(n_classes)], | |
yticklabels=[f'Class {i}' for i in range(n_classes)]) | |
plt.title('Confusion Matrix') | |
plt.xlabel('Predicted') | |
plt.ylabel('True') | |
plt.tight_layout() | |
return plt.gcf() | |
def plot_classification_metrics(y_true, y_pred, n_classes): | |
from sklearn.metrics import classification_report | |
import pandas as pd | |
report = classification_report(y_true, y_pred, output_dict=True) | |
df = pd.DataFrame(report).transpose() | |
df = df.drop('support', axis=1) | |
df = df.round(3) | |
return df | |
def visualize_weights(model): | |
weights = [] | |
for layer in model.layers: | |
weights.append(layer.weight.detach().numpy()) | |
n_layers = len(weights) | |
fig, axes = plt.subplots(1, n_layers, figsize=(5*n_layers, 5)) | |
if n_layers == 1: | |
axes = [axes] | |
for i, (weight, ax) in enumerate(zip(weights, axes)): | |
im = ax.imshow(weight, cmap='coolwarm') | |
ax.set_title(f'Layer {i+1} Weights') | |
plt.colorbar(im, ax=ax) | |
plt.tight_layout() | |
return fig | |
def plot_weight_optimization(weights_history): | |
# Visualize the change of the first weight in the first neuron over epochs | |
weights_history = np.array(weights_history) | |
fig, ax = plt.subplots(figsize=(8, 4)) | |
for i in range(weights_history.shape[1]): | |
ax.plot(weights_history[:, i, 0], label=f'Neuron {i+1}') | |
ax.set_title('First Layer Weights Optimization (first input weight per neuron)') | |
ax.set_xlabel('Epoch') | |
ax.set_ylabel('Weight Value') | |
ax.legend() | |
plt.tight_layout() | |
return fig | |
def visualize_network(input_size, hidden_sizes, output_size): | |
G = nx.DiGraph() | |
layers = [input_size] + hidden_sizes + [output_size] | |
pos = {} | |
node_labels = {} | |
node_count = 0 | |
y_gap = 1.5 | |
x_gap = 2 | |
for l, n_nodes in enumerate(layers): | |
for n in range(n_nodes): | |
node_id = f'L{l}N{n}' | |
G.add_node(node_id, layer=l) | |
pos[node_id] = (l * x_gap, -n * y_gap + (n_nodes-1)*y_gap/2) | |
if l == 0: | |
node_labels[node_id] = f'In{n+1}' | |
elif l == len(layers)-1: | |
node_labels[node_id] = f'Out{n+1}' | |
else: | |
node_labels[node_id] = f'H{l}-{n+1}' | |
# Add edges | |
for l in range(len(layers)-1): | |
for n1 in range(layers[l]): | |
for n2 in range(layers[l+1]): | |
G.add_edge(f'L{l}N{n1}', f'L{l+1}N{n2}') | |
fig, ax = plt.subplots(figsize=(2*len(layers), 6)) | |
nx.draw(G, pos, ax=ax, with_labels=True, labels=node_labels, node_size=1000, node_color='skyblue', arrowsize=10) | |
ax.set_title('MLP Architecture') | |
plt.tight_layout() | |
return fig |