Spaces:

AashishAIHub
/

DataScience

Running

App Files Files Community

DataScience / DeepLearning /index.html

AashishAIHub

add new topics

ccd63d1 3 months ago

raw

history blame

265 kB

	<!DOCTYPE html>
	<html lang="en">

	<head>
	<meta charset="UTF-8">
	<meta name="viewport" content="width=device-width, initial-scale=1.0">
	<title>Complete Deep Learning & Computer Vision Curriculum</title>
	<style>
	* {
	margin: 0;
	padding: 0;
	box-sizing: border-box;
	}

	:root {
	--bg: #0f1419;
	--surface: #1a1f2e;
	--text: #e4e6eb;
	--text-dim: #b0b7c3;
	--cyan: #00d4ff;
	--orange: #ff6b35;
	--green: #00ff88;
	--yellow: #ffa500;
	}

	body {
	font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
	background: var(--bg);
	color: var(--text);
	line-height: 1.6;
	overflow-x: hidden;
	}

	.container {
	max-width: 1400px;
	margin: 0 auto;
	padding: 20px;
	}

	header {
	text-align: center;
	margin-bottom: 40px;
	padding: 30px 0;
	border-bottom: 2px solid var(--cyan);
	}

	h1 {
	font-size: 2.5em;
	background: linear-gradient(135deg, var(--cyan), var(--orange));
	background-clip: text;
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	margin-bottom: 10px;
	}

	.subtitle {
	color: var(--text-dim);
	font-size: 1.1em;
	}

	.dashboard {
	display: none;
	}

	.dashboard.active {
	display: block;
	}

	.grid {
	display: grid;
	grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));
	gap: 25px;
	margin: 40px 0;
	}

	.card {
	background: linear-gradient(135deg, rgba(0, 212, 255, 0.1), rgba(255, 107, 53, 0.1));
	border: 2px solid var(--cyan);
	border-radius: 12px;
	padding: 30px;
	cursor: pointer;
	transition: all 0.3s ease;
	text-align: center;
	}

	.card:hover {
	transform: translateY(-5px);
	box-shadow: 0 10px 30px rgba(0, 212, 255, 0.2);
	border-color: var(--orange);
	}

	.card-icon {
	font-size: 3em;
	margin-bottom: 15px;
	}

	.card h3 {
	color: var(--cyan);
	font-size: 1.5em;
	margin-bottom: 10px;
	}

	.card p {
	color: var(--text-dim);
	font-size: 0.95em;
	}

	.category-label {
	display: inline-block;
	margin-top: 10px;
	padding: 5px 12px;
	background: rgba(0, 212, 255, 0.2);
	border-radius: 20px;
	font-size: 0.85em;
	color: var(--green);
	}

	.module {
	display: none;
	}

	.module.active {
	display: block;
	animation: fadeIn 0.3s ease;
	}

	@keyframes fadeIn {
	from {
	opacity: 0;
	}

	to {
	opacity: 1;
	}
	}

	.btn-back {
	padding: 10px 20px;
	background: var(--orange);
	color: var(--bg);
	border: none;
	border-radius: 6px;
	cursor: pointer;
	font-weight: 600;
	margin-bottom: 25px;
	transition: all 0.3s ease;
	}

	.btn-back:hover {
	background: var(--cyan);
	}

	.tabs {
	display: flex;
	gap: 10px;
	margin-bottom: 30px;
	flex-wrap: wrap;
	justify-content: center;
	border-bottom: 1px solid rgba(0, 212, 255, 0.2);
	padding-bottom: 15px;
	overflow-x: auto;
	}

	.tab-btn {
	padding: 10px 20px;
	background: var(--surface);
	color: var(--text);
	border: 2px solid transparent;
	border-radius: 6px;
	cursor: pointer;
	font-size: 0.95em;
	transition: all 0.3s ease;
	font-weight: 500;
	white-space: nowrap;
	}

	.tab-btn:hover {
	background: rgba(0, 212, 255, 0.1);
	border-color: var(--cyan);
	}

	.tab-btn.active {
	background: var(--cyan);
	color: var(--bg);
	border-color: var(--cyan);
	}

	.tab {
	display: none;
	}

	.tab.active {
	display: block;
	animation: fadeIn 0.3s ease;
	}

	.section {
	background: var(--surface);
	border: 1px solid rgba(0, 212, 255, 0.2);
	border-radius: 10px;
	padding: 30px;
	margin-bottom: 25px;
	transition: all 0.3s ease;
	}

	.section:hover {
	border-color: var(--cyan);
	box-shadow: 0 0 20px rgba(0, 212, 255, 0.1);
	}

	h2 {
	color: var(--cyan);
	font-size: 1.8em;
	margin-bottom: 15px;
	}

	h3 {
	color: var(--orange);
	font-size: 1.3em;
	margin-top: 20px;
	margin-bottom: 12px;
	}

	h4 {
	color: var(--green);
	font-size: 1.1em;
	margin-top: 15px;
	margin-bottom: 10px;
	}

	p {
	margin-bottom: 15px;
	line-height: 1.8;
	}

	ul {
	margin-left: 20px;
	margin-bottom: 15px;
	}

	ul li {
	margin-bottom: 8px;
	}

	.info-box {
	background: linear-gradient(135deg, rgba(0, 212, 255, 0.1), rgba(255, 107, 53, 0.1));
	border: 1px solid var(--cyan);
	border-radius: 8px;
	padding: 20px;
	margin: 20px 0;
	}

	.box-title {
	color: var(--orange);
	font-weight: 700;
	margin-bottom: 10px;
	font-size: 1.1em;
	}

	.box-content {
	color: var(--text-dim);
	line-height: 1.7;
	}

	.formula {
	background: rgba(0, 212, 255, 0.1);
	border: 1px solid var(--cyan);
	border-radius: 8px;
	padding: 20px;
	margin: 20px 0;
	font-family: 'Courier New', monospace;
	overflow-x: auto;
	line-height: 1.8;
	color: var(--cyan);
	}

	.callout {
	border-left: 4px solid;
	padding: 15px;
	margin: 20px 0;
	border-radius: 6px;
	}

	.callout.tip {
	border-left-color: var(--green);
	background: rgba(0, 255, 136, 0.05);
	}

	.callout.warning {
	border-left-color: var(--yellow);
	background: rgba(255, 165, 0, 0.05);
	}

	.callout.insight {
	border-left-color: var(--cyan);
	background: rgba(0, 212, 255, 0.05);
	}

	.callout-title {
	font-weight: 700;
	margin-bottom: 8px;
	}

	.list-item {
	display: flex;
	gap: 12px;
	margin: 12px 0;
	padding: 12px;
	background: rgba(0, 212, 255, 0.05);
	border-left: 3px solid var(--cyan);
	border-radius: 4px;
	}

	.list-num {
	color: var(--orange);
	font-weight: 700;
	min-width: 30px;
	}

	table {
	width: 100%;
	border-collapse: collapse;
	margin: 20px 0;
	}

	th,
	td {
	padding: 12px;
	text-align: left;
	border: 1px solid rgba(0, 212, 255, 0.2);
	}

	th {
	background: rgba(0, 212, 255, 0.1);
	color: var(--cyan);
	font-weight: 700;
	}

	.viz-container {
	background: rgba(0, 212, 255, 0.02);
	border: 1px solid rgba(0, 212, 255, 0.2);
	border-radius: 8px;
	padding: 20px;
	margin: 20px 0;
	display: flex;
	justify-content: center;
	overflow-x: auto;
	}

	.viz-controls {
	display: flex;
	gap: 10px;
	margin-top: 20px;
	justify-content: center;
	flex-wrap: wrap;
	}

	.btn-viz {
	padding: 10px 20px;
	background: var(--cyan);
	color: var(--bg);
	border: none;
	border-radius: 6px;
	font-weight: 600;
	cursor: pointer;
	font-size: 0.95em;
	transition: all 0.3s ease;
	}

	.btn-viz:hover {
	background: var(--orange);
	transform: scale(1.05);
	}

	canvas {
	max-width: 100%;
	height: auto;
	}

	@media (max-width: 768px) {
	h1 {
	font-size: 1.8em;
	}

	.tabs {
	flex-direction: column;
	}

	.tab-btn {
	width: 100%;
	}

	.grid {
	grid-template-columns: 1fr;
	}

	canvas {
	width: 100% !important;
	height: auto !important;
	}
	}
	</style>
	</head>

	<body>
	<div class="container">
	<!-- MAIN DASHBOARD -->
	<div id="dashboard" class="dashboard active">
	<header>
	<h1>🧠 Complete Deep Learning & Computer Vision</h1>
	<p class="subtitle">Comprehensive Curriculum \| Foundations to Advanced Applications</p>
	</header>

	<div style="text-align: center; margin-bottom: 40px;">
	<p style="color: var(--text-dim); font-size: 1.1em;">
	Master all aspects of deep learning and computer vision. 25+ modules covering neural networks, CNNs,
	object detection, GANs, and more.
	</p>
	</div>

	<div class="grid" id="modulesGrid"></div>
	</div>

	<!-- MODULES CONTAINER -->
	<div id="modulesContainer"></div>
	</div>

	<script>
	const modules = [
	// Module 1: Deep Learning Foundations
	{
	id: "nn-basics",
	title: "Introduction to Neural Networks",
	icon: "🧬",
	category: "Foundations",
	color: "#0088ff",
	description: "Biological vs. Artificial neurons and network architecture"
	},
	{
	id: "perceptron",
	title: "The Perceptron",
	icon: "⚙️",
	category: "Foundations",
	color: "#0088ff",
	description: "Single layer networks and their limitations"
	},
	{
	id: "mlp",
	title: "Multi-Layer Perceptron (MLP)",
	icon: "🏗️",
	category: "Foundations",
	color: "#0088ff",
	description: "Hidden layers and deep architectures"
	},
	{
	id: "activation",
	title: "Activation Functions",
	icon: "⚡",
	category: "Foundations",
	color: "#0088ff",
	description: "Sigmoid, ReLU, Tanh, Leaky ReLU, ELU, Softmax"
	},
	{
	id: "weight-init",
	title: "Weight Initialization",
	icon: "🎯",
	category: "Foundations",
	color: "#0088ff",
	description: "Xavier, He, Random initialization strategies"
	},
	{
	id: "loss",
	title: "Loss Functions",
	icon: "📉",
	category: "Foundations",
	color: "#0088ff",
	description: "MSE, Binary Cross-Entropy, Categorical Cross-Entropy"
	},
	{
	id: "optimizers",
	title: "Optimizers",
	icon: "🎯",
	category: "Training",
	color: "#00ff00",
	description: "SGD, Momentum, Adam, Adagrad, RMSprop"
	},
	{
	id: "backprop",
	title: "Forward & Backpropagation",
	icon: "⬅️",
	category: "Training",
	color: "#00ff00",
	description: "Chain rule and gradient computation"
	},
	{
	id: "regularization",
	title: "Regularization",
	icon: "🛡️",
	category: "Training",
	color: "#00ff00",
	description: "L1/L2, Dropout, Early Stopping, Batch Norm"
	},
	{
	id: "batch-norm",
	title: "Batch Normalization",
	icon: "⚙️",
	category: "Training",
	color: "#00ff00",
	description: "Stabilizing and speeding up training"
	},
	// Module 2: Computer Vision Fundamentals
	{
	id: "cv-intro",
	title: "CV Fundamentals",
	icon: "👁️",
	category: "Computer Vision",
	color: "#ff6b35",
	description: "Why ANNs fail with images, parameter explosion"
	},
	{
	id: "conv-layer",
	title: "Convolutional Layers",
	icon: "🖼️",
	category: "Computer Vision",
	color: "#ff6b35",
	description: "Kernels, filters, feature maps, stride, padding"
	},
	{
	id: "pooling",
	title: "Pooling Layers",
	icon: "📦",
	category: "Computer Vision",
	color: "#ff6b35",
	description: "Max pooling, average pooling, spatial reduction"
	},
	{
	id: "cnn-basics",
	title: "CNN Architecture",
	icon: "🏗️",
	category: "Computer Vision",
	color: "#ff6b35",
	description: "Combining conv, pooling, and fully connected layers"
	},
	{
	id: "viz-filters",
	title: "Visualizing CNNs",
	icon: "🔍",
	category: "Computer Vision",
	color: "#ff6b35",
	description: "What filters learn: edges → shapes → objects"
	},
	// Module 3: Advanced CNN Architectures
	{
	id: "lenet",
	title: "LeNet-5",
	icon: "🔢",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "Classic digit recognizer (MNIST)"
	},
	{
	id: "alexnet",
	title: "AlexNet",
	icon: "🌟",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "The breakthrough in deep computer vision (2012)"
	},
	{
	id: "vgg",
	title: "VGGNet",
	icon: "📊",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "VGG-16/19: Deep networks with small filters"
	},
	{
	id: "resnet",
	title: "ResNet",
	icon: "🌉",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "Skip connections, solving vanishing gradients"
	},
	{
	id: "inception",
	title: "InceptionNet (GoogLeNet)",
	icon: "🎯",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "1x1 convolutions, multi-scale feature extraction"
	},
	{
	id: "mobilenet",
	title: "MobileNet",
	icon: "📱",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "Depth-wise separable convolutions for efficiency"
	},
	{
	id: "transfer-learning",
	title: "Transfer Learning",
	icon: "🔄",
	category: "CNN Architectures",
	color: "#ff00ff",
	description: "Fine-tuning and leveraging pre-trained models"
	},
	// Module 4: Object Detection & Segmentation
	{
	id: "localization",
	title: "Object Localization",
	icon: "📍",
	category: "Detection",
	color: "#00ff00",
	description: "Bounding boxes and classification together"
	},
	{
	id: "rcnn",
	title: "R-CNN Family",
	icon: "🎯",
	category: "Detection",
	color: "#00ff00",
	description: "R-CNN, Fast R-CNN, Faster R-CNN"
	},
	{
	id: "yolo",
	title: "YOLO",
	icon: "⚡",
	category: "Detection",
	color: "#00ff00",
	description: "Real-time object detection (v3, v5, v8)"
	},
	{
	id: "ssd",
	title: "SSD",
	icon: "🚀",
	category: "Detection",
	color: "#00ff00",
	description: "Single Shot MultiBox Detector"
	},
	{
	id: "semantic-seg",
	title: "Semantic Segmentation",
	icon: "🖌️",
	category: "Segmentation",
	color: "#00ff00",
	description: "Pixel-level classification (U-Net)"
	},
	{
	id: "instance-seg",
	title: "Instance Segmentation",
	icon: "👥",
	category: "Segmentation",
	color: "#00ff00",
	description: "Mask R-CNN and separate object instances"
	},
	{
	id: "face-recog",
	title: "Face Recognition",
	icon: "👤",
	category: "Segmentation",
	color: "#00ff00",
	description: "Siamese networks and triplet loss"
	},
	// Module 5: Generative Models
	{
	id: "autoencoders",
	title: "Autoencoders",
	icon: "🔀",
	category: "Generative",
	color: "#ffaa00",
	description: "Encoder-decoder, latent space, denoising"
	},
	{
	id: "gans",
	title: "GANs (Generative Adversarial Networks)",
	icon: "🎮",
	category: "Generative",
	color: "#ffaa00",
	description: "Generator vs. Discriminator, DCGAN"
	},
	{
	id: "diffusion",
	title: "Diffusion Models",
	icon: "🌊",
	category: "Generative",
	color: "#ffaa00",
	description: "Foundation of Stable Diffusion and DALL-E"
	},
	// Additional Advanced Topics
	{
	id: "rnn",
	title: "RNNs & LSTMs",
	icon: "🔄",
	category: "Sequence",
	color: "#ff6b35",
	description: "Recurrent networks for sequential data"
	},
	{
	id: "transformers",
	title: "Transformers",
	icon: "🔗",
	category: "Sequence",
	color: "#ff6b35",
	description: "Attention mechanisms and modern architectures"
	},
	{
	id: "bert",
	title: "BERT & NLP Transformers",
	icon: "📚",
	category: "NLP",
	color: "#ff6b35",
	description: "Bidirectional transformers for language"
	},
	{
	id: "gpt",
	title: "GPT & Language Models",
	icon: "💬",
	category: "NLP",
	color: "#ff6b35",
	description: "Autoregressive models and text generation"
	},
	{
	id: "vit",
	title: "Vision Transformers (ViT)",
	icon: "🎨",
	category: "Vision",
	color: "#ff6b35",
	description: "Transformers applied to image data"
	},
	{
	id: "gnn",
	title: "Graph Neural Networks",
	icon: "🕸️",
	category: "Advanced",
	color: "#9900ff",
	description: "Deep learning on non-Euclidean graph data"
	}
	];

	// Comprehensive content for all modules
	const MODULE_CONTENT = {
	"nn-basics": {
	overview: `
	<h3>What are Neural Networks?</h3>
	<p>Neural Networks are computational models inspired by the human brain's structure. They consist of interconnected nodes (neurons) organized in layers that process information through weighted connections.</p>

	<h3>Why Use Neural Networks?</h3>
	<ul>
	<li><strong>Universal Approximation:</strong> Can theoretically approximate any continuous function</li>
	<li><strong>Feature Learning:</strong> Automatically discover representations from raw data</li>
	<li><strong>Adaptability:</strong> Learn from examples without explicit programming</li>
	<li><strong>Parallel Processing:</strong> Highly parallelizable for modern hardware</li>
	</ul>

	<div class="callout tip">
	<div class="callout-title">✅ Advantages</div>
	• Non-linear problem solving<br>
	• Robust to noisy data<br>
	• Works with incomplete information<br>
	• Continuous learning capability
	</div>

	<div class="callout warning">
	<div class="callout-title">⚠️ Disadvantages</div>
	• Requires large amounts of training data<br>
	• Computationally expensive<br>
	• "Black box" - difficult to interpret<br>
	• Prone to overfitting without regularization
	</div>
	`,
	concepts: `
	<h3>Core Components</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Neurons (Nodes):</strong> Basic computational units that receive inputs, apply weights, add bias, and apply activation function</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Layers:</strong> Input layer (receives data), Hidden layers (feature extraction), Output layer (predictions)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Weights:</strong> Parameters learned during training that determine connection strength</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Bias:</strong> Allows shifting the activation function for better fitting</div>
	</div>
	<div class="list-item">
	<div class="list-num">05</div>
	<div><strong>Activation Function:</strong> Introduces non-linearity (ReLU, Sigmoid, Tanh)</div>
	</div>
	`,
	applications: `
	<h3>Real-World Applications</h3>
	<div class="info-box">
	<div class="box-title">🏥 Healthcare</div>
	<div class="box-content">Disease diagnosis, medical image analysis, drug discovery, patient risk prediction</div>
	</div>
	<div class="info-box">
	<div class="box-title">💰 Finance</div>
	<div class="box-content">Fraud detection, algorithmic trading, credit scoring, portfolio optimization</div>
	</div>
	<div class="info-box">
	<div class="box-title">🛒 E-commerce</div>
	<div class="box-content">Recommendation systems, demand forecasting, customer segmentation, price optimization</div>
	</div>
	`
	},
	"activation": {
	overview: `
	<h3>What are Activation Functions?</h3>
	<p>Activation functions introduce non-linearity into neural networks, enabling them to learn complex patterns. Without activation functions, a neural network would be just a linear regression model regardless of depth.</p>

	<h3>Why Do We Need Them?</h3>
	<ul>
	<li><strong>Non-linearity:</strong> Real-world problems are rarely linear</li>
	<li><strong>Complex Pattern Learning:</strong> Enable learning of intricate decision boundaries</li>
	<li><strong>Gradient Flow:</strong> Control how gradients propagate during backpropagation</li>
	<li><strong>Range Normalization:</strong> Keep activations in manageable ranges</li>
	</ul>

	<h3>Common Activation Functions Comparison</h3>
	<table>
	<tr>
	<th>Function</th>
	<th>Range</th>
	<th>Best Use</th>
	<th>Issue</th>
	</tr>
	<tr>
	<td>ReLU</td>
	<td>[0, ∞)</td>
	<td>Hidden layers (default)</td>
	<td>Dying ReLU problem</td>
	</tr>
	<tr>
	<td>Sigmoid</td>
	<td>(0, 1)</td>
	<td>Binary classification output</td>
	<td>Vanishing gradients</td>
	</tr>
	<tr>
	<td>Tanh</td>
	<td>(-1, 1)</td>
	<td>RNNs, zero-centered</td>
	<td>Vanishing gradients</td>
	</tr>
	<tr>
	<td>Leaky ReLU</td>
	<td>(-∞, ∞)</td>
	<td>Fixes dying ReLU</td>
	<td>Extra hyperparameter</td>
	</tr>
	<tr>
	<td>Softmax</td>
	<td>(0, 1) sum=1</td>
	<td>Multi-class output</td>
	<td>Computationally expensive</td>
	</tr>
	</table>
	`,
	concepts: `
	<h3>Key Properties</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Differentiability:</strong> Must have derivatives for backpropagation to work</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Monotonicity:</strong> Preferably monotonic for easier optimization</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Zero-Centered:</strong> Helps with faster convergence (Tanh)</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Computational Efficiency:</strong> Should be fast to compute (ReLU wins)</div>
	</div>

	<div class="callout tip">
	<div class="callout-title">💡 Best Practices</div>
	• Use <strong>ReLU</strong> for hidden layers by default<br>
	• Use <strong>Sigmoid</strong> for binary classification output<br>
	• Use <strong>Softmax</strong> for multi-class classification<br>
	• Try <strong>Leaky ReLU</strong> or <strong>ELU</strong> if ReLU neurons are dying<br>
	• Avoid Sigmoid/Tanh in deep networks (gradient vanishing)
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🧠 Neural Network Design</div>
	<div class="box-content">
	Critical choice for every neural network - affects training speed, convergence, and final accuracy
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎯 Task-Specific Selection</div>
	<div class="box-content">
	Different tasks need different outputs: Sigmoid for binary, Softmax for multi-class, Linear for regression
	</div>
	</div>
	`,
	math: `
	<h3>Derivatives: The Backprop Fuel</h3>
	<p>Activation functions must be differentiable for backpropagation to work. Let's look at the derivatives on paper:</p>

	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Sigmoid:</strong> σ(z) = 1 / (1 + e⁻ᶻ)<br>
	<strong>Derivative:</strong> σ'(z) = σ(z)(1 - σ(z))<br>
	<span class="formula-caption">Max gradient is 0.25 (at z=0). This is why deep networks vanish!</span></div>
	</div>

	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Tanh:</strong> tanh(z) = (eᶻ - e⁻ᶻ) / (eᶻ + e⁻ᶻ)<br>
	<strong>Derivative:</strong> tanh'(z) = 1 - tanh²(z)<br>
	<span class="formula-caption">Max gradient is 1.0 (at z=0). Better than Sigmoid, but still vanishes.</span></div>
	</div>

	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>ReLU:</strong> max(0, z)<br>
	<strong>Derivative:</strong> 1 if z > 0, else 0<br>
	<span class="formula-caption">Gradient is 1.0 for all positive z. No vanishing! But 0 for negative (Dying ReLU).</span></div>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: The Chain Effect</div>
	Each layer multiplies the gradient by σ'(z). <br>
	For 10 Sigmoid layers: Total gradient ≈ (0.25)¹⁰ ≈ <strong>0.00000095</strong><br>
	This is the mathematical proof of the Vanishing Gradient Problem!
	</div>
	`
	},
	"conv-layer": {
	overview: `
	<h3>What are Convolutional Layers?</h3>
	<p>Convolutional layers are the fundamental building blocks of CNNs. They apply learnable filters (kernels) across input data to detect local patterns like edges, textures, and shapes.</p>

	<h3>Why Use Convolutions Instead of Fully Connected Layers?</h3>
	<ul>
	<li><strong>Parameter Efficiency:</strong> Share weights across spatial locations (fewer parameters)</li>
	<li><strong>Translation Invariance:</strong> Detect features regardless of position</li>
	<li><strong>Local Connectivity:</strong> Each neuron sees

	only a small region (receptive field)</li>
	<li><strong>Hierarchical Learning:</strong> Build complex features from simple ones</li>
	</ul>

	<div class="callout insight">
	<div class="callout-title">🔍 Example: Parameter Comparison</div>
	For a 224×224 RGB image:<br>
	• <strong>Fully Connected:</strong> 224 × 224 × 3 × 1000 = 150M parameters (for 1000 neurons)<br>
	• <strong>Convolutional (3×3):</strong> 3 × 3 × 3 × 64 = 1,728 parameters (for 64 filters)<br>
	<strong>Result:</strong> 87,000x fewer parameters! 🚀
	</div>

	<div class="callout tip">
	<div class="callout-title">✅ Advantages</div>
	• Drastically reduced parameters<br>
	• Spatial hierarchy (edges → textures → parts → objects)<br>
	• GPU-friendly (highly parallelizable)<br>
	• Built-in translation equivariance
	</div>

	<div class="callout warning">
	<div class="callout-title">⚠️ Disadvantages</div>
	• Not rotation invariant (require data augmentation)<br>
	• Fixed receptive field size<br>
	• Memory intensive during training<br>
	• Require careful hyperparameter tuning (kernel size, stride, padding)
	</div>
	`,
	concepts: `
	<h3>Key Hyperparameters</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Kernel/Filter Size:</strong> Typically 3×3 or 5×5. Smaller = more layers needed, larger = more parameters</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Stride:</strong> Step size when sliding filter. Stride=1 (preserves size), Stride=2 (downsamples by 2×)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Padding:</strong> Add zeros around borders. 'SAME' keeps size, 'VALID' shrinks output</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Number of Filters:</strong> Each filter learns different features. More filters = more capacity but slower</div>
	</div>
	<div class="list-item">
	<div class="list-num">05</div>
	<div><strong>Dilation:</strong> Spacing between kernel elements. Increases receptive field without adding parameters</div>
	</div>

	<div class="formula">
	Output Size Formula:<br>
	W_out = floor((W_in + 2×padding - kernel_size) / stride) + 1<br>
	H_out = floor((H_in + 2×padding - kernel_size) / stride) + 1
	</div>
	`,
	math: `
	<h3>The Mathematical Operation: Cross-Correlation</h3>
	<p>In deep learning, what we call "convolution" is mathematically "cross-correlation". It is a local dot product of the kernel and image patch.</p>

	<div class="formula">
	S(i, j) = (I * K)(i, j) = Σ_m Σ_n I(i+m, j+n) K(m, n)
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Manual Convolution</div>
	Input (3x3):<br>
	[1 2 0]<br>
	[0 1 1]<br>
	[1 0 2]<br>
	<br>
	Kernel (2x2):<br>
	[1 0]<br>
	[0 1]<br>
	<br>
	Calculation:<br>
	Step 1 (Top-Left): (1x1) + (2x0) + (0x0) + (1x1) = <strong>2</strong><br>
	Step 2 (Top-Right): (2x1) + (0x0) + (1x0) + (1x1) = <strong>3</strong><br>
	... Output is a 2x2 matrix.
	</div>

	<h3>Backprop through Conv</h3>
	<p>Calculated using the same formula but with the kernel flipped vertically and horizontally (true convolution)!</p>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🔍 Feature Extraction</div>
	<div class="box-content">
	Early layers learn edges (Gabor-like filters), middle layers learn textures, deep layers learn specific object parts (eyes, wheels).
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎨 Image Processing</div>
	<div class="box-content">
	Blurring, sharpening, and edge detection in Photoshop/GIMP are all done with 2D convolutions using fixed kernels.
	</div>
	</div>
	`
	},
	"yolo": {
	overview: `
	<h3>What is YOLO?</h3>
	<p>YOLO (You Only Look Once) treats object detection as a single regression problem, going directly from image pixels to bounding box coordinates and class probabilities in one forward pass.</p>

	<h3>Why YOLO Over R-CNN?</h3>
	<ul>
	<li><strong>Speed:</strong> 45+ FPS (real-time) vs R-CNN's ~0.05 FPS</li>
	<li><strong>Global Context:</strong> Sees entire image during training (fewer background errors)</li>
	<li><strong>One Network:</strong> Unlike R-CNN's multi-stage pipeline</li>
	<li><strong>End-to-End Training:</strong> Optimize detection directly</li>
	</ul>

	<div class="callout tip">
	<div class="callout-title">✅ Advantages</div>
	• <strong>Lightning Fast:</strong> Real-time inference (YOLOv8 at 100+ FPS)<br>
	• <strong>Simple Architecture:</strong> Single network, easy to train<br>
	• <strong>Generalizes Well:</strong> Works on natural images and artwork<br>
	• <strong>Small Model Size:</strong> Can run on edge devices (mobile, IoT)
	</div>

	<div class="callout warning">
	<div class="callout-title">⚠️ Disadvantages</div>
	• <strong>Struggles with Small Objects:</strong> Grid limitation affects tiny items<br>
	• <strong>Localization Errors:</strong> Less precise than two-stage detectors<br>
	• <strong>Limited Objects per Cell:</strong> Can't detect many close objects<br>
	• <strong>Aspect Ratio Issues:</strong> Struggles with unusual object shapes
	</div>

	<h3>YOLO Evolution</h3>
	<table>
	<tr>
	<th>Version</th>
	<th>Year</th>
	<th>Key Innovation</th>
	<th>mAP</th>
	</tr>
	<tr>
	<td>YOLOv1</td>
	<td>2015</td>
	<td>Original single-shot detector</td>
	<td>63.4%</td>
	</tr>
	<tr>
	<td>YOLOv3</td>
	<td>2018</td>
	<td>Multi-scale predictions</td>
	<td>57.9% (faster)</td>
	</tr>
	<tr>
	<td>YOLOv5</td>
	<td>2020</td>
	<td>PyTorch, Auto-augment</td>
	<td>~50% (optimized)</td>
	</tr>
	<tr>
	<td>YOLOv8</td>
	<td>2023</td>
	<td>Anchor-free, SOTA speed</td>
	<td>53.9% (real-time)</td>
	</tr>
	</table>
	`,
	concepts: `
	<h3>How YOLO Works (3 Steps)</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Grid Division:</strong> Divide image into S×S grid (e.g., 7×7). Each cell predicts B bounding boxes</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Predictions Per Cell:</strong> Each box predicts (x, y, w, h, confidence) + class probabilities</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Non-Max Suppression:</strong> Remove duplicate detections, keep highest confidence boxes</div>
	</div>

	<div class="formula">
	Output Tensor Shape (YOLOv1):<br>
	S × S × (B × 5 + C)<br>
	Example: 7 × 7 × (2 × 5 + 20) = 7 × 7 × 30<br>
	<br>
	Where:<br>
	• S = grid size (7)<br>
	• B = boxes per cell (2)<br>
	• 5 = (x, y, w, h, confidence)<br>
	• C = number of classes (20 for PASCAL VOC)
	</div>
	`,
	applications: `
	<h3>Industry Applications</h3>
	<div class="info-box">
	<div class="box-title">🚗 Autonomous Vehicles</div>
	<div class="box-content">
	Real-time detection of pedestrians, vehicles, traffic signs, and lane markings for self-driving cars
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🏭 Manufacturing</div>
	<div class="box-content">
	Quality control, defect detection on assembly lines, robot guidance, inventory management
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🛡️ Security & Surveillance</div>
	<div class="box-content">
	Intrusion detection, crowd monitoring, suspicious behavior analysis, license plate recognition
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🏥 Medical Imaging</div>
	<div class="box-content">
	Tumor localization, cell counting, anatomical structure detection in X-rays/CT scans
	</div>
	</div>
	`,
	math: `
	<h3>Intersection over Union (IoU)</h3>
	<p>How do we measure if a predicted box is correct? We use the geometric ratio of intersection and union.</p>
	<div class="formula">
	IoU = Area of Overlap / Area of Union
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Manual IoU</div>
	Box A (GT): [0,0,10,10] (Area=100)<br>
	Box B (Pred): [5,5,15,15] (Area=100)<br>
	1. Intersection: Area between [5,5] and [10,10] = 5x5 = 25<br>
	2. Union: Area A + Area B - Intersection = 100 + 100 - 25 = 175<br>
	3. IoU: 25 / 175 ≈ <strong>0.142</strong> (Poor match!)
	</div>

	<h3>YOLO Multi-Part Loss</h3>
	<p>YOLO uses a composite loss function combining localization, confidence, and classification errors.</p>
	<div class="formula">
	L = λ_coord Σ(Localization Loss) + Σ(Confidence Loss) + Σ(Classification Loss)
	</div>
	`
	},
	"transformers": {
	overview: `
	<h3>What are Transformers?</h3>
	<p>Transformers are neural architectures based entirely on attention mechanisms, eliminating recurrence and convolutions. Introduced in "Attention is All You Need" (2017), they revolutionized NLP and are now conquering computer vision.</p>

	<h3>Why Transformers Over RNNs/LSTMs?</h3>
	<ul>
	<li><strong>Parallelization:</strong> Process entire sequence at once (vs sequential RNNs)</li>
	<li><strong>Long-Range Dependencies:</strong> Direct connections between any two positions</li>
	<li><strong>No Gradient Vanishing:</strong> Skip connections and attention bypass depth issues</li>
	<li><strong>Scalability:</strong> Performance improves with more data and compute</li>
	</ul>

	<div class="callout tip">
	<div class="callout-title">✅ Advantages</div>
	• <strong>Superior Performance:</strong> SOTA on nearly all NLP benchmarks<br>
	• <strong>Highly Parallelizable:</strong> Train 100× faster than RNNs on TPUs/GPUs<br>
	• <strong>Transfer Learning:</strong> Pre-train once, fine-tune for many tasks<br>
	• <strong>Interpretability:</strong> Attention weights show what model focuses on<br>
	• <strong>Multi-Modal:</strong> Works for text, images, audio, video
	</div>

	<div class="callout warning">
	<div class="callout-title">⚠️ Disadvantages</div>
	• <strong>Quadratic Complexity:</strong> O(n²) in sequence length (memory intensive)<br>
	• <strong>Massive Data Requirements:</strong> Need millions of examples to train from scratch<br>
	• <strong>Computational Cost:</strong> Training GPT-3 cost ~$4.6M<br>
	• <strong>Position Encoding:</strong> Require explicit positional information<br>
	• <strong>Limited Context:</strong> Most models cap at 512-4096 tokens
	</div>

	<h3>Transformer Variants</h3>
	<table>
	<tr>
	<th>Model</th>
	<th>Type</th>
	<th>Architecture</th>
	<th>Best For</th>
	</tr>
	<tr>
	<td>BERT</td>
	<td>Encoder-only</td>
	<td>Bidirectional</td>
	<td>Understanding (classification, QA)</td>
	</tr>
	<tr>
	<td>GPT</td>
	<td>Decoder-only</td>
	<td>Autoregressive</td>
	<td>Generation (text, code)</td>
	</tr>
	<tr>
	<td>T5</td>
	<td>Encoder-Decoder</td>
	<td>Full Transformer</td>
	<td>Text-to-text tasks (translation)</td>
	</tr>
	<tr>
	<td>ViT</td>
	<td>Encoder-only</td>
	<td>Patch embeddings</td>
	<td>Image classification</td>
	</tr>
	</table>
	`,
	concepts: `
	<h3>Core Components</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Self-Attention:</strong> Each token attends to all other tokens, learning contextual relationships</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Multi-Head Attention:</strong> Multiple attention mechanisms in parallel (8-16 heads), each learning different patterns</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Positional Encoding:</strong> Add position information since attention is permutation-invariant</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Feed-Forward Networks:</strong> Two-layer MLPs applied to each position independently</div>
	</div>
	<div class="list-item">
	<div class="list-num">05</div>
	<div><strong>Layer Normalization:</strong> Stabilize training, applied before attention and FFN</div>
	</div>
	<div class="list-item">
	<div class="list-num">06</div>
	<div><strong>Residual Connections:</strong> Skip connections around each sub-layer for gradient flow</div>
	</div>

	<div class="formula">
	Self-Attention Formula:<br>
	Attention(Q, K, V) = softmax(QK<sup>T</sup> / √d<sub>k</sub>) V<br>
	<br>
	Where:<br>
	• Q = Queries (what we're looking for)<br>
	• K = Keys (what each token represents)<br>
	• V = Values (actual information to aggregate)<br>
	• d<sub>k</sub> = dimension of keys (for scaling)<br>
	<br>
	Multi-Head Attention:<br>
	MultiHead(Q,K,V) = Concat(head₁,...,head<sub>h</sub>)W<sup>O</sup><br>
	where head<sub>i</sub> = Attention(QW<sub>i</sub><sup>Q</sup>, KW<sub>i</sub><sup>K</sup>, VW<sub>i</sub><sup>V</sup>)
	</div>
	`,
	applications: `
	<h3>Revolutionary Applications</h3>
	<div class="info-box">
	<div class="box-title">💬 Large Language Models</div>
	<div class="box-content">
	<strong>ChatGPT, GPT-4, Claude:</strong> Conversational AI, code generation, creative writing, reasoning<br>
	<strong>BERT, RoBERTa:</strong> Search engines (Google), question answering, sentiment analysis
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🌐 Machine Translation</div>
	<div class="box-content">
	<strong>Google Translate, DeepL:</strong> Transformers achieved human-level translation quality<br>
	Supports 100+ languages, real-time translation
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎨 Multi-Modal AI</div>
	<div class="box-content">
	<strong>DALL-E, Midjourney:</strong> Text-to-image generation<br>
	<strong>CLIP:</strong> Image-text understanding<br>
	<strong>Whisper:</strong> Speech recognition
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🧬 Scientific Discovery</div>
	<div class="box-content">
	<strong>AlphaFold:</strong> Protein structure prediction (Nobel Prize-worthy breakthrough)<br>
	<strong>Drug Discovery:</strong> Molecule generation and property prediction
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">💻 Code Intelligence</div>
	<div class="box-content">
	<strong>GitHub Copilot:</strong> AI pair programmer<br>
	<strong>CodeGen, AlphaCode:</strong> Automated coding, bug detection
	</div>
	</div>
	`,
	math: `
	<h3>Scaled Dot-Product Attention</h3>
	<p>The "heart" of the Transformer. It computes how much "attention" to pay to different parts of the input sequence.</p>

	<div class="formula" style="font-size: 1.3rem; text-align: center; margin: 20px 0; background: rgba(0, 212, 255, 0.05); padding: 20px; border-radius: 8px;">
	Attention(Q, K, V) = softmax( (QKᵀ) / √dₖ ) V
	</div>

	<h3>Step-by-Step Derivation</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Dot Product (QKᵀ):</strong> Compute raw similarity scores between Queries (what we want) and Keys (what we have)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Scaling (1/√dₖ):</strong> Divide by square root of key dimension. <strong>Why?</strong> With high dimensions, dot products grow large, pushing softmax into regions with vanishing gradients. Scaling prevents this.</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Softmax:</strong> Convert similarity scores into probabilities (attention weights) that sum to 1</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Weighted Sum (×V):</strong> Use attention weights to pull information from Values.</div>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Multi-Head Attention</div>
	Instead of one big attention, we split Q, K, V into <em>h</em> heads:<br>
	1. Heads learn <strong>different aspects</strong> (e.g., syntax vs semantics)<br>
	2. Concat all heads: MultiHead = Concat(head₁, ..., headₕ)Wᴼ<br>
	3. Complexity: <strong>O(n² · d)</strong> - This is why long sequences are hard!
	</div>

	<div class="callout warning">
	<div class="callout-title">📐 Sinusoidal Positional Encoding</div>
	PE(pos, 2i) = sin(pos / 10000^{2i/d})<br>
	PE(pos, 2i+1) = cos(pos / 10000^{2i/d})<br>
	This allows the model to learn relative positions since PE(pos+k) is a linear function of PE(pos).
	</div>
	`
	},
	"perceptron": {
	overview: `
	<h3>What is a Perceptron?</h3>
	<p>The perceptron is the simplest neural network, invented in 1958. It's a binary linear classifier that makes predictions based on weighted inputs.</p>

	<div class="callout tip">
	<div class="callout-title">✅ Advantages</div>
	• Simple and fast<br>
	• Guaranteed convergence for linearly separable data<br>
	• Interpretable weights
	</div>

	<div class="callout warning">
	<div class="callout-title">⚠️ Key Limitation</div>
	<strong>Cannot solve XOR:</strong> Limited to linear decision boundaries only
	</div>
	`,
	concepts: `
	<h3>How Perceptron Works</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Weighted Sum:</strong> z = w₁x₁ + w₂x₂ + ... + b</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Step Function:</strong> Output = 1 if z ≥ 0, else 0</div>
	</div>
	<div class="formula">
	Learning Rule: w_new = w_old + α(y_true - y_pred)x
	</div>
	`,
	math: `
	<h3>Perceptron Learning Algorithm</h3>
	<p>The perceptron update rule is the simplest form of gradient descent.</p>

	<div class="formula">
	For each misclassified sample (x, y):<br>
	w ← w + α × y × x<br>
	b ← b + α × y
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Manual Training</div>
	<strong>Data:</strong> x₁ = [1, 1], y₁ = 1 \| x₂ = [0, 0], y₂ = 0<br>
	<strong>Initial:</strong> w = [0, 0], b = 0, α = 1<br>
	<br>
	<strong>Iteration 1 (x₁):</strong><br>
	z = 0×1 + 0×1 + 0 = 0 → ŷ = 1 ✓ (correct!)<br>
	<br>
	<strong>Iteration 2 (x₂):</strong><br>
	z = 0×0 + 0×0 + 0 = 0 → ŷ = 1 ✗ (wrong! y=0)<br>
	Update: w = [0,0] + 1×(0-1)×[0,0] = [0,0], b = 0 + 1×(0-1) = -1<br>
	<br>
	Now z(x₂) = 0 + 0 - 1 = -1 → ŷ = 0 ✓
	</div>

	<h3>Convergence Theorem</h3>
	<div class="formula">
	If data is linearly separable with margin γ and \|\|x\|\| ≤ R,<br>
	perceptron converges in at most (R/γ)² updates.
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📚 Educational</div>
	<div class="box-content">
	Historical importance - first trainable neural model. Perfect for teaching ML fundamentals
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🔬 Simple Classification</div>
	<div class="box-content">
	Linearly separable problems: basic pattern recognition, simple binary decisions
	</div>
	</div>
	`
	},
	"mlp": {
	overview: `
	<h3>Multi-Layer Perceptron (MLP)</h3>
	<p>MLP adds hidden layers between input and output, enabling non-linear decision boundaries and solving the XOR problem that single perceptrons cannot.</p>

	<h3>Why MLPs?</h3>
	<ul>
	<li><strong>Universal Approximation:</strong> Can approximate any continuous function</li>
	<li><strong>Non-Linear Learning:</strong> Solves complex problems</li>
	<li><strong>Feature Extraction:</strong> Hidden layers learn hierarchical features</li>
	</ul>

	<div class="callout insight">
	<div class="callout-title">💡 The XOR Breakthrough</div>
	Single perceptron: Cannot solve XOR<br>
	MLP with 1 hidden layer (2 neurons): Solves XOR!<br>
	This proves the power of depth.
	</div>
	`,
	concepts: `
	<h3>Architecture Components</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Input Layer:</strong> Raw features (no computation)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Hidden Layers:</strong> Extract progressively abstract features</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Output Layer:</strong> Final predictions</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📊 Tabular Data</div>
	<div class="box-content">Credit scoring, fraud detection, customer churn, sales forecasting</div>
	</div>
	<div class="info-box">
	<div class="box-title">🏭 Manufacturing</div>
	<div class="box-content">Quality control, predictive maintenance, demand forecasting</div>
	</div>
	`,
	math: `
	<h3>Neural Network Forward Pass (Matrix Form)</h3>
	<p>Vectorization is key to modern deep learning. We process entire layers as matrix multiplications.</p>

	<div class="formula">
	Layer 1: z⁽¹⁾ = W⁽¹⁾x + b⁽¹⁾ \| a⁽¹⁾ = σ(z⁽¹⁾)<br>
	Layer 2: z⁽²⁾ = W⁽²⁾a⁽¹⁾ + b⁽²⁾ \| a⁽²⁾ = σ(z⁽²⁾)<br>
	...<br>
	Layer L: ŷ = Softmax(W⁽ᴸ⁾a⁽ᴸ⁻¹⁾ + b⁽ᴸ⁾)
	</div>

	<h3>Paper & Pain: Dimensionality Audit</h3>
	<p>Understanding tensor shapes is the #1 skill for debugging neural networks.</p>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Input x:</strong> [n_features, 1]</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Weights W⁽¹⁾:</strong> [n_hidden, n_features]</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Bias b⁽¹⁾:</strong> [n_hidden, 1]</div>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Solving XOR</div>
	Input: [0,1], Target: 1<br>
	Layer 1 (2 neurons):<br>
	z₁ = 10x₁ + 10x₂ - 5   \|   a₁ = σ(z₁)<br>
	z₂ = 10x₁ + 10x₂ - 15 \|   a₂ = σ(z₂)<br>
	Layer 2 (1 neuron):<br>
	y = σ(20a₁ - 20a₂ - 10)<br>
	<strong>Try it on paper!</strong> This specific configuration correctly outputs XOR values.
	</div>
	`
	},
	"weight-init": {
	overview: `
	<h3>Weight Initialization Strategies</h3>
	<table>
	<tr>
	<th>Method</th>
	<th>Best For</th>
	<th>Formula</th>
	</tr>
	<tr>
	<td>Xavier/Glorot</td>
	<td>Sigmoid, Tanh</td>
	<td>N(0, √(2/(n_in+n_out)))</td>
	</tr>
	<tr>
	<td>He/Kaiming</td>
	<td>ReLU</td>
	<td>N(0, √(2/n_in))</td>
	</tr>
	</table>

	<div class="callout warning">
	<div class="callout-title">⚠️ Never Initialize to Zero!</div>
	All neurons learn identical features (symmetry problem)
	</div>
	`,
	concepts: `
	<h3>Key Principles</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Variance Preservation:</strong> Keep activation variance similar across layers</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Symmetry Breaking:</strong> Different weights force different features</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎯 Critical for Deep Networks</div>
	<div class="box-content">
	Proper initialization is essential for training networks >10 layers. Wrong init = training failure
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">⚡ Faster Convergence</div>
	<div class="box-content">
	Good initialization reduces training time by 2-10×, especially with modern optimizers
	</div>
	</div>
	`,
	math: `
	<h3>The Variance Preservation Principle</h3>
	<p>To prevent gradients from vanishing or exploding, we want the variance of the activations to remain constant across layers.</p>

	<div class="formula">
	For a linear layer: y = Σ wᵢxᵢ<br>
	Var(y) = Var(Σ wᵢxᵢ) = Σ Var(wᵢxᵢ)<br>
	Assuming w and x are independent with mean 0:<br>
	Var(wᵢxᵢ) = E[wᵢ²]E[xᵢ²] - E[wᵢ]²E[xᵢ]² = Var(wᵢ)Var(xᵢ)<br>
	So, Var(y) = n_in × Var(w) × Var(x)
	</div>

	<h3>1. Xavier (Glorot) Initialization</h3>
	<p>Goal: Var(y) = Var(x) and Var(grad_out) = Var(grad_in)</p>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Forward Pass:</strong> n_in × Var(w) = 1 ⇒ Var(w) = 1/n_in</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Backward Pass:</strong> n_out × Var(w) = 1 ⇒ Var(w) = 1/n_out</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Compromise:</strong> Var(w) = 2 / (n_in + n_out)</div>
	</div>

	<h3>2. He (Kaiming) Initialization</h3>
	<p>For ReLU activation, half the neurons are inactive (output 0), which halves the variance. We must compensate.</p>
	<div class="formula">
	Var(ReLU(y)) = 1/2 × Var(y)<br>
	To keep Var(ReLU(y)) = Var(x):<br>
	1/2 × n_in × Var(w) = 1<br>
	<strong>Var(w) = 2 / n_in</strong>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain Calculation</div>
	If n_in = 256 and you use ReLU:<br>
	Weight Std Dev = √(2/256) = √(1/128) ≈ <strong>0.088</strong><br>
	Initializing with std=1.0 or std=0.01 would cause immediate failure in a deep net!
	</div>
	`
	},
	"loss": {
	overview: `
	<h3>Loss Functions Guide</h3>
	<table>
	<tr>
	<th>Task</th>
	<th>Loss Function</th>
	</tr>
	<tr>
	<td>Binary Classification</td>
	<td>Binary Cross-Entropy</td>
	</tr>
	<tr>
	<td>Multi-class</td>
	<td>Categorical Cross-Entropy</td>
	</tr>
	<tr>
	<td>Regression</td>
	<td>MSE or MAE</td>
	</tr>
	</table>
	`,
	concepts: `
	<h3>Common Loss Functions</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>MSE:</strong> (1/n)Σ(y - ŷ)² - Penalizes large errors</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Cross-Entropy:</strong> -Σ(y·log(ŷ)) - For classification</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎯 Task-Dependent Selection</div>
	<div class="box-content">
	Every ML task needs appropriate loss: classification (cross-entropy), regression (MSE/MAE), ranking (triplet loss)
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">📊 Custom Losses</div>
	<div class="box-content">
	Business-specific objectives: Focal Loss (imbalanced data), Dice Loss (segmentation), Contrastive Loss (similarity learning)
	</div>
	</div>
	`,
	math: `
	<h3>Binary Cross-Entropy (BCE) Derivation</h3>
	<p>Why do we use logs? BCE is derived from Maximum Likelihood Estimation (MLE) assuming a Bernoulli distribution.</p>

	<div class="formula">
	L(ŷ, y) = -(y log(ŷ) + (1-y) log(1-ŷ))
	</div>

	<h3>Paper & Pain: Why not MSE for Classification?</h3>
	<p>If we use MSE for sigmoid output, the gradient is:</p>
	<div class="formula">
	∂L/∂w = (ŷ - y) <strong>σ'(z)</strong> x
	</div>
	<div class="callout warning">
	<div class="callout-title">⚠️ The Saturation Problem</div>
	If the model is very wrong (e.g., target 1, output 0.001), σ'(z) is near 0. <br>
	The gradient vanishes, and the model <strong>stops learning!</strong>.
	</div>

	<h3>The BCE Advantage</h3>
	<p>When using BCE, the σ'(z) term cancels out! The gradient becomes:</p>
	<div class="formula" style="font-size: 1.2rem; color: #00d4ff;">
	∂L/∂w = (ŷ - y) x
	</div>
	<div class="list-item">
	<div class="list-num">💡</div>
	<div>This is beautiful: the gradient depends <strong>only on the error</strong> (ŷ-y), not on how saturated the neuron is. This enables much faster training.</div>
	</div>
	`
	},
	"optimizers": {
	overview: `
	<h3>Optimizer Selection Guide</h3>
	<table>
	<tr>
	<th>Optimizer</th>
	<th>When to Use</th>
	</tr>
	<tr>
	<td>Adam/AdamW</td>
	<td><strong>Default choice</strong> - works 90% of time</td>
	</tr>
	<tr>
	<td>SGD + Momentum</td>
	<td>CNNs (better final accuracy with patience)</td>
	</tr>
	<tr>
	<td>RMSprop</td>
	<td>RNNs</td>
	</tr>
	</table>

	<div class="formula">
	Adam: m_t = β₁·m + (1-β₁)·∇L<br>
	v_t = β₂·v + (1-β₂)·(∇L)²<br>
	w = w - α·m_t/√(v_t)
	</div>
	`,
	concepts: `
	<h3>Optimizer Evolution</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>SGD:</strong> Simple but requires careful learning rate tuning</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Adam:</strong> Adaptive rates + momentum = works out-of-box</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🚀 Training Acceleration</div>
	<div class="box-content">
	Modern optimizers (Adam) reduce training time by 5-10× compared to basic SGD
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎯 Architecture-Specific</div>
	<div class="box-content">
	CNNs: SGD+Momentum \| Transformers: AdamW \| RNNs: RMSprop \| Default: Adam
	</div>
	</div>
	`
	},
	"backprop": {
	overview: `
	<h3>Backpropagation Algorithm</h3>
	<p>Backprop efficiently computes gradients by applying the chain rule from output to input, enabling training of deep networks.</p>

	<h3>Why Backpropagation?</h3>
	<ul>
	<li><strong>Efficient:</strong> Computes all gradients in single backward pass</li>
	<li><strong>Scalable:</strong> Works for networks of any depth</li>
	<li><strong>Automatic:</strong> Modern frameworks do it automatically</li>
	</ul>
	`,
	concepts: `
	<div class="formula">
	Chain Rule:<br>
	∂L/∂w = ∂L/∂y × ∂y/∂z × ∂z/∂w<br>
	<br>
	For layer l:<br>
	δˡ = (W^(l+1))^T δ^(l+1) ⊙ σ'(z^l)<br>
	∂L/∂W^l = δ^l (a^(l-1))^T
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🧠 Universal Training Method</div>
	<div class="box-content">
	Every modern neural network uses backprop - from CNNs to Transformers to GANs
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🔧 Automatic Differentiation</div>
	<div class="box-content">
	PyTorch, TensorFlow implement automatic backprop - you define forward pass, framework does backward
	</div>
	</div>
	`,
	math: `
	<h3>The 4 Fundamental Equations of Backprop</h3>
	<p>Backpropagation is essentially the chain rule applied iteratively. We define the error signal δ = ∂L/∂z.</p>

	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Error at Output Layer (L):</strong><br>
	δᴸ = ∇ₐL ⊙ σ'(zᴸ)<br>
	<span class="formula-caption">Example for MSE: (aᴸ - y) ⊙ σ'(zᴸ)</span></div>
	</div>

	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Error at Layer l (Backwards):</strong><br>
	δˡ = ((Wˡ⁺¹)ᵀ δˡ⁺¹) ⊙ σ'(zˡ)</div>
	</div>

	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Gradient w.r.t Bias:</strong><br>
	∂L / ∂bˡ = δˡ</div>
	</div>

	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Gradient w.r.t Weights:</strong><br>
	∂L / ∂Wˡ = δˡ (aˡ⁻¹)ᵀ</div>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain Walkthrough</div>
	Suppose single neuron: z = wx + b, Loss L = (σ(z) - y)²/2<br>
	1. <strong>Forward:</strong> z=2, a=σ(2)≈0.88, y=1, L=0.007<br>
	2. <strong>Backward:</strong><br>
	∂L/∂a = (a-y) = -0.12<br>
	∂a/∂z = σ(z)(1-σ(z)) = 0.88 * 0.12 = 0.1056<br>
	δ = ∂L/∂z = -0.12 * 0.1056 = -0.01267<br>
	<strong>∂L/∂w = δ * x</strong> \| <strong>∂L/∂b = δ</strong>
	</div>
	`
	},
	"regularization": {
	overview: `
	<h3>Regularization Techniques</h3>
	<table>
	<tr>
	<th>Method</th>
	<th>How It Works</th>
	<th>When to Use</th>
	</tr>
	<tr>
	<td>L2 (Ridge)</td>
	<td>Adds λΣw² to loss</td>
	<td>Keeps all features, reduces magnitude</td>
	</tr>
	<tr>
	<td>L1 (Lasso)</td>
	<td>Adds λΣ\|w\| to loss</td>
	<td>Feature selection (zeros out weights)</td>
	</tr>
	<tr>
	<td>Dropout</td>
	<td>Randomly drops neurons (p=0.5 typical)</td>
	<td><strong>Most effective for deep networks</strong></td>
	</tr>
	<tr>
	<td>Early Stopping</td>
	<td>Stop when validation loss increases</td>
	<td>Prevents overfitting during training</td>
	</tr>
	<tr>
	<td>Data Augmentation</td>
	<td>Artificially expand dataset</td>
	<td>Computer vision (rotations, flips, crops)</td>
	</tr>
	</table>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎯 Best Practices</div>
	<div class="box-content">
	• Start with Dropout (0.5) for hidden layers<br>
	• Add L2 if still overfitting (λ=0.01, 0.001)<br>
	• Always use Early Stopping<br>
	• Data Augmentation for images
	</div>
	</div>
	`
	},
	"batch-norm": {
	overview: `
	<h3>Batch Normalization</h3>
	<p>Normalizes layer inputs to have mean=0 and variance=1, stabilizing and accelerating training.</p>

	<div class="callout tip">
	<div class="callout-title">✅ Benefits</div>
	• <strong>Faster Training:</strong> Allows higher learning rates<br>
	• <strong>Reduces Vanishing Gradients:</strong> Better gradient flow<br>
	• <strong>Regularization Effect:</strong> Adds slight noise<br>
	• <strong>Less Sensitive to Init:</strong> Reduces initialization impact
	</div>
	`,
	math: `
	<h3>The 4 Steps of Batch Normalization</h3>
	<p>Calculated per mini-batch B = {x₁, ..., xₘ}:</p>

	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Mini-Batch Mean:</strong> μ_B = (1/m) Σ xᵢ</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Mini-Batch Variance:</strong> σ²_B = (1/m) Σ (xᵢ - μ_B)²</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Normalize:</strong> x̂ᵢ = (xᵢ - μ_B) / √(σ²_B + ε)</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Scale and Shift:</strong> yᵢ = γ x̂ᵢ + β</div>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Why γ and β?</div>
	If we only normalized to (0,1), we might restrict the representation power of the network. <br>
	γ and β allow the network to <strong>undo</strong> the normalization if that's optimal: <br>
	If γ = √(σ²) and β = μ, we get the original data back!
	</div>
	`
	},
	"cv-intro": {
	overview: `
	<h3>Why Computer Vision Needs Special Architectures</h3>
	<p><strong>Problem:</strong> Images have huge dimensionality</p>
	<ul>
	<li>224×224 RGB image = 150,528 input features</li>
	<li>Fully connected layer with 1000 neurons = 150M parameters!</li>
	<li>Result: Overfitting, slow training, memory issues</li>
	</ul>

	<h3>Solution: Convolutional Neural Networks</h3>
	<ul>
	<li><strong>Weight Sharing:</strong> Same filter applied everywhere (1000x fewer parameters)</li>
	<li><strong>Local Connectivity:</strong> Neurons see small patches</li>
	<li><strong>Translation Invariance:</strong> Detect cat anywhere in image</li>
	</ul>
	`,
	concepts: `
	<h3>Why CNNs Beat Fully Connected</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Parameter Efficiency:</strong> 1000× fewer parameters through weight sharing</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Translation Equivariance:</strong> Same object → same activation regardless of position</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📸 Real-World CV</div>
	<div class="box-content">
	Face ID, medical imaging (MRI/CT), autonomous drone navigation, manufacturing defect detection, and satellite imagery analysis
	</div>
	</div>
	`,
	math: `
	<h3>The Parameter Explosion Problem</h3>
	<p>Why do standard Neural Networks fail on images? Let's calculate the parameters for a small image.</p>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: MLP vs Images</div>
	1. Input: 224 × 224 pixels with 3 color channels (RGB)<br>
	2. Input Size: 224 × 224 × 3 = <strong>150,528 features</strong><br>
	3. Hidden Layer: Suppose we want just 1000 neurons.<br>
	4. Matrix size: [1000, 150528]<br>
	5. Total Weights: 1000 × 150528 ≈ <strong>150 Million parameters</strong> for just ONE layer!
	</div>

	<h3>The CNN Solution: Weight Sharing</h3>
	<p>Instead of every neuron looking at every pixel, we use <strong>translation invariance</strong>. If an edge detector works in the top-left, it should work in the bottom-right.</p>

	<div class="formula">
	Total Params = (Kernel_H × Kernel_W × Input_Channels) × Num_Filters<br>
	<br>
	For a 3x3 filter: (3 × 3 × 3) × 64 = <strong>1,728 parameters</strong><br>
	Reduction: 150M / 1.7k ≈ <strong>86,000× more efficient!</strong>
	</div>
	`
	},
	"pooling": {
	overview: `
	<h3>Pooling Layers</h3>
	<p>Pooling reduces spatial dimensions while retaining important information.</p>

	<table>
	<tr>
	<th>Type</th>
	<th>Operation</th>
	<th>Use Case</th>
	</tr>
	<tr>
	<td>Max Pooling</td>
	<td>Take maximum value</td>
	<td><strong>Most common</strong> - preserves strong activations</td>
	</tr>
	<tr>
	<td>Average Pooling</td>
	<td>Take average</td>
	<td>Smoother, less common (used in final layers)</td>
	</tr>
	<tr>
	<td>Global Pooling</td>
	<td>Pool entire feature map</td>
	<td>Replace FC layers (reduces parameters)</td>
	</tr>
	</table>

	<div class="callout tip">
	<div class="callout-title">✅ Benefits</div>
	• Reduces spatial size (faster computation)<br>
	• Adds translation invariance<br>
	• Prevents overfitting<br>
	• Typical: 2×2 window, stride 2 (halves dimensions)
	</div>
	`,
	concepts: `
	<h3>Pooling Mechanics</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Downsampling:</strong> Reduces H×W by pooling factor (typically 2×)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>No Learnable Parameters:</strong> Fixed operation (max/average)</div>
	</div>
	<div class="formula">
	Example: 4×4 input → 2×2 max pooling → 2×2 output
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎯 Standard CNN Component</div>
	<div class="box-content">
	Used after conv layers in AlexNet, VGG, and most classic CNNs to progressively reduce spatial dimensions
	</div>
	</div>
	`,
	math: `
	<h3>Max Pooling: Winning Signal Selection</h3>
	<p>Pooling operations are non-parametric (no weights). They simply select or average values within a local window.</p>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: 2x2 Max Pooling</div>
	Input (4x4):<br>
	[1 3 \| 2 1]<br>
	[5 1 \| 0 2]<br>
	-----------<br>
	[1 1 \| 8 2]<br>
	[0 2 \| 4 1]<br>
	<br>
	Output (2x2):<br>
	Step 1: max(1, 3, 5, 1) = <strong>5</strong><br>
	Step 2: max(2, 1, 0, 2) = <strong>2</strong><br>
	Step 3: max(1, 1, 0, 2) = <strong>2</strong><br>
	Step 4: max(8, 2, 4, 1) = <strong>8</strong><br>
	Final: [5 2] / [2 8]
	</div>

	<h3>Backprop through Pooling</h3>
	<div class="list-item">
	<div class="list-num">💡</div>
	<div><strong>Max Pooling:</strong> Gradient is routed ONLY to the neuron that had the maximum value. All others get 0.</div>
	</div>
	<div class="list-item">
	<div class="list-num">💡</div>
	<div><strong>Average Pooling:</strong> Gradient is distributed evenly among all neurons in the window.</div>
	</div>
	`
	},
	"cnn-basics": {
	overview: `
	<h3>CNN Architecture Pattern</h3>
	<div class="formula">
	Input → [Conv → ReLU → Pool] × N → Flatten → FC → Softmax
	</div>

	<h3>Typical Layering Strategy</h3>
	<ul>
	<li><strong>Early Layers:</strong> Detect low-level features (edges, textures) - small filters (3×3)</li>
	<li><strong>Middle Layers:</strong> Combine into patterns, parts - more filters, same size</li>
	<li><strong>Deep Layers:</strong> High-level concepts (faces, objects) - many filters</li>
	<li><strong>Final FC Layers:</strong> Classification based on learned features</li>
	</ul>

	<div class="callout insight">
	<div class="callout-title">💡 Filter Progression</div>
	Layer 1: 32 filters (edges)<br>
	Layer 2: 64 filters (textures)<br>
	Layer 3: 128 filters (patterns)<br>
	Layer 4: 256 filters (parts)<br>
	Common pattern: double filters after each pooling
	</div>
	`,
	concepts: `
	<h3>Module Design Principles</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Spatial Reduction:</strong> Progressively downsample (224→112→56→28...)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Channel Expansion:</strong> Increase filters as spatial dims decrease</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎯 All Modern Vision Models</div>
	<div class="box-content">
	This pattern forms the backbone of ResNet, MobileNet, EfficientNet - fundamental CNN design
	</div>
	</div>
	`,
	math: `
	<h3>1. The Golden Formula for Output Size</h3>
	<p>Given Input (W), Filter Size (F), Padding (P), and Stride (S):</p>
	<div class="formula" style="font-size: 1.2rem; text-align: center; margin: 20px 0;">
	Output Size = ⌊(W - F + 2P) / S⌋ + 1
	</div>

	<h3>2. Parameter Count Calculation</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Parameters PER Filter:</strong> (F × F × C_in) + 1 (bias)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Total Parameters:</strong> N_filters × ((F × F × C_in) + 1)</div>
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain Calculation</div>
	<strong>Input:</strong> 224x224x3 \| <strong>Layer:</strong> 64 filters of 3x3 \| <strong>Stride:</strong> 1 \| <strong>Padding:</strong> 1<br>
	1. <strong>Output Size:</strong> (224 - 3 + 2(1))/1 + 1 = 224 (Same Padding)<br>
	2. <strong>Params:</strong> 64 * (3 * 3 * 3 + 1) = 64 * 28 = <strong>1,792 parameters</strong><br>
	3. <strong>FLOPs:</strong> 224 * 224 * 1792 ≈ <strong>90 Million operations</strong> per image!
	</div>
	`
	},
	"viz-filters": {
	overview: `
	<h3>What CNNs Learn</h3>
	<p>CNN filters automatically learn hierarchical visual features:</p>

	<h3>Layer-by-Layer Visualization</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Layer 1:</strong> Edges and colors (horizontal, vertical, diagonal lines)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Layer 2:</strong> Textures and patterns (corners, curves, simple shapes)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Layer 3:</strong> Object parts (eyes, wheels, windows)</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Layer 4-5:</strong> Whole objects (faces, cars, animals)</div>
	</div>
	`,
	concepts: `
	<h3>Visualization Techniques</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Activation Maximization:</strong> Find input that maximizes filter response</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Grad-CAM:</strong> Highlight important regions for predictions</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🔍 Model Interpretability</div>
	<div class="box-content">
	Understanding what CNNs learn helps debug failures, build trust, and improve architecture design
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎨 Art & Style Transfer</div>
	<div class="box-content">
	Filter visualizations inspired neural style transfer (VGG features)
	</div>
	</div>
	`
	},
	"lenet": {
	overview: `
	<h3>LeNet-5 (1998) - The Pioneer</h3>
	<p>First successful CNN for digit recognition (MNIST). Introduced the Conv → Pool → Conv → Pool pattern still used today.</p>

	<h3>Architecture</h3>
	<div class="formula">
	Input 32×32 → Conv(6 filters, 5×5) → AvgPool → Conv(16 filters, 5×5) → AvgPool → FC(120) → FC(84)→ FC(10)
	</div>

	<div class="callout insight">
	<div class="callout-title">🏆 Historical Impact</div>
	• Used by US Postal Service for zip code recognition<br>
	• Proved CNNs work for real-world tasks<br>
	• Template for modern architectures
	</div>
	`,
	concepts: `
	<h3>Key Innovations</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Layered Architecture:</strong> Hierarchical feature extraction</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Shared Weights:</strong> Convolutional parameter sharing</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">✉️ Handwriting Recognition</div>
	<div class="box-content">
	USPS mail sorting, check processing, form digitization
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">📚 Educational Foundation</div>
	<div class="box-content">
	Perfect starting point for learning CNNs - simple enough to understand, complex enough to be useful
	</div>
	</div>
	`
	},
	"alexnet": {
	overview: `
	<h3>AlexNet (2012) - The Deep Learning Revolution</h3>
	<p>Won ImageNet 2012 by huge margin (15.3% vs 26.2% error), igniting the deep learning revolution.</p>

	<h3>Key Innovations</h3>
	<ul>
	<li><strong>ReLU Activation:</strong> Faster training than sigmoid/tanh</li>
	<li><strong>Dropout:</strong> Prevents overfitting (p=0.5)</li>
	<li><strong>Data Augmentation:</strong> Random crops/flips</li>
	<li><strong>GPU Training:</strong> Used 2 GTX580 GPUs</li>
	<li><strong>Deep:</strong> 8 layers (5 conv + 3 FC), 60M parameters</li>
	</ul>

	<div class="callout tip">
	<div class="callout-title">💡 Why So Important?</div>
	First to show that deeper networks + more data + GPU compute = breakthrough performance
	</div>
	`,
	concepts: `
	<h3>Technical Contributions</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>ReLU:</strong> Solved vanishing gradients, enabled deeper networks</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Dropout:</strong> First major regularization for deep nets</div>
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎯 ImageNet Challenge</div>
	<div class="box-content">
	Shattered records on 1000-class classification, proving deep learning superiority
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🚀 Industry Catalyst</div>
	<div class="box-content">
	Sparked AI renaissance - Google, Facebook, Microsoft pivoted to deep learning after AlexNet
	</div>
	</div>
	`,
	math: `
	<h3>Paper & Pain: Parameter Counting</h3>
	<p>Understanding AlexNet's 60M parameters:</p>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Conv Layers:</strong> Only ~2.3 Million parameters. They do most of the work with small memory!</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>FC Layers:</strong> Over 58 Million parameters. The first FC layer (FC6) takes 4096 * (66256) ≈ 37M params!</div>
	</div>
	<div class="callout warning">
	<div class="callout-title">⚠️ The Design Flaw</div>
	FC layers are the memory bottleneck. Modern models (ResNet, Inception) replace these with Global Average Pooling to save 90% parameters.
	</div>
	`
	},
	"vgg": {
	overview: `
	<h3>VGGNet (2014) - The Power of Depth</h3>
	<p>VGG showed that depth matters - 16-19 layers using only small 3×3 filters.</p>
	`,
	concepts: `
	<h3>Small Filters, Receptive Field</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Uniformity:</strong> Uses 3×3 filters everywhere with stride 1, padding 1.</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Pooling Pattern:</strong> 2×2 max pooling after every 2-3 conv layers.</div>
	</div>
	`,
	math: `
	<h3>The 5×5 vs 3×3+3×3 Equivalence</h3>
	<p>Why stack 3x3 filters instead of one large filter?</p>
	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Paramount Efficiency</div>
	1. Receptive Field: Two 3x3 layers cover 5x5 area. Three 3x3 layers cover 7x7 area.<br>
	2. Param Count (C filters):<br>
	• One 7x7 layer: 7² × C² = 49C² parameters.<br>
	• Three 3x3 layers: 3 × (3² × C²) = 27C² parameters.<br>
	Result: 45% reduction in weights for the SAME "view" of the image!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🖼️ Feature Backbone</div>
	VGG is the preferred architectural backbone for Neural Style Transfer and early GANs due to its simple, clean feature extraction properties.
	</div>
	`
	},
	"resnet": {
	overview: `
	<h3>ResNet (2015) - Residual Connections</h3>
	<p><strong>Problem:</strong> Very deep networks (>20 layers) had degradation - training accuracy got worse!</p>

	<h3>Solution: Skip Connections</h3>
	<div class="formula">
	Instead of learning H(x), learn residual F(x) = H(x) - x<br>
	Output: y = F(x) + x (shortcut connection)
	</div>

	<h3>Why Skip Connections Work</h3>
	<ul>
	<li><strong>Gradient Flow:</strong> Gradients flow directly through shortcuts</li>
	<li><strong>Identity Mapping:</strong> Easy to learn identity (just set F(x)=0)</li>
	<li><strong>Feature Reuse:</strong> Earlier features directly available to later layers</li>
	</ul>

	<div class="callout tip">
	<div class="callout-title">🏆 Impact</div>
	• Enabled training of 152-layer networks (even 1000+ layers)<br>
	• Won ImageNet 2015<br>
	• Skip connections now used everywhere (U-Net, Transformers, etc.)
	</div>
	`,
	concepts: `
	<h3>Identity & Projection Shortcuts</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Identity Shortcut:</strong> Used when dimensions match. y = F(x, {W}) + x</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Projection Shortcut (1×1 Conv):</strong> Used when dimensions change. y = F(x, {W}) + W_s x</div>
	</div>
	`,
	math: `
	<h3>The Vanishing Gradient Solution</h3>
	<p>Why do skip connections help? Let's differentiate the output y = F(x) + x:</p>
	<div class="formula">
	∂y/∂x = ∂F/∂x + 1
	</div>
	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Gradient Flow</div>
	The "+1" term acts as a gradient highway. Even if the weights in F(x) are small (causing ∂F/∂x → 0), the gradient can still flow through the +1 term. <br>
	This prevents the gradient from vanishing even in networks with 1000+ layers!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🏗️ Modern Vision Backbones</div>
	<div class="box-content">ResNet is the default starting point for nearly all computer vision tasks today (Mask R-CNN, YOLO, etc.).</div>
	</div>
	`
	},
	"inception": {
	overview: `
	<h3>Inception/GoogLeNet (2014) - Going Wider</h3>
	<p>Instead of going deeper, Inception modules go wider - using multiple filter sizes in parallel.</p>

	<h3>Inception Module</h3>
	<div class="formula">
	Input → [1×1 conv] ⊕ [3×3 conv] ⊕ [5×5 conv] ⊕ [3×3 pool] → Concatenate
	</div>
	`,
	concepts: `
	<h3>Core Innovations</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>1×1 Bottlenecks:</strong> Dimensionality reduction before expensive convolutions.</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Auxiliary Classifiers:</strong> Used during training to combat gradient vanishing in middle layers.</div>
	</div>
	`,
	math: `
	<h3>1×1 Convolution Math (Network-in-Network)</h3>
	<p>A 1×1 convolution acts like a channel-wise MLP. It maps input channels C to output channels C' using 1×1×C parameters per filter.</p>
	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Compression</div>
	Input: 28x28x256 \| Target: 28x28x512 with 3x3 Filters.<br>
	Direct: 512 * (33256) ≈ 1.1 Million params.<br>
	Inception (1x1 bottleneck to 64):<br>
	Step 1 (1x1): 64 * (11256) = 16k params.<br>
	Step 2 (3x3): 512 * (3364) = 294k params.<br>
	Total: 310k params. ~3.5× reduction in parameters!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🏎️ Computational Efficiency</div>
	Inception designs are optimized for running deep networks on limited compute budgets.
	</div>
	`
	},
	"mobilenet": {
	overview: `
	<h3>MobileNet - CNNs for Mobile Devices</h3>
	<p>Designed for mobile/embedded vision using depthwise separable convolutions.</p>

	<h3>Depthwise Separable Convolution</h3>
	<div class="formula">
	Standard Conv = Depthwise Conv + Pointwise (1×1) Conv
	</div>

	<h3>Computation Reduction</h3>
	<table>
	<tr>
	<th>Method</th>
	<th>Parameters</th>
	<th>FLOPs</th>
	</tr>
	<tr>
	<td>Standard 3×3 Conv</td>
	<td>3×3×C_in×C_out</td>
	<td>High</td>
	</tr>
	<tr>
	<td>Depthwise Separable</td>
	<td>3×3×C_in + C_in×C_out</td>
	<td><strong>8-9× less!</strong></td>
	</tr>
	</table>

	<div class="callout tip">
	<div class="callout-title">✅ Applications</div>
	• Real-time mobile apps (camera filters, AR)<br>
	• Edge devices (drones, IoT)<br>
	• Latency-critical systems<br>
	• Good accuracy with 10-20× speedup
	</div>
	`,
	concepts: `
	<h3>Efficiency Factors</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Width Multiplier (α):</strong> Thins the network by reducing channels.</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Resolution Multiplier (ρ):</strong> Reduces input image size.</div>
	</div>
	`,
	math: `
	<h3>Depthwise Separable Math</h3>
	<p>Standard convolution complexity: F² × C_in × C_out × H × W</p>
	<p>Separable complexity: (F² × C_in + C_in × C_out) × H × W</p>
	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: The 9× Speedup</div>
	Reduction ratio is roughly: 1/C_out + 1/F². <br>
	For 3x3 filters (F=3): Reduction is roughly 1/9th the computation of standard conv!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📱 Edge Devices</div>
	<div class="box-content">Real-time object detection on smartphones, web browsers (TensorFlow.js), and IoT devices.</div>
	</div>
	`
	},
	"transfer-learning": {
	overview: `
	<h3>Transfer Learning - Don't Train from Scratch!</h3>
	<p>Use pre-trained models (ImageNet) as feature extractors for your custom task.</p>

	<h3>Two Strategies</h3>
	<table>
	<tr>
	<th>Approach</th>
	<th>When to Use</th>
	<th>How</th>
	</tr>
	<tr>
	<td>Feature Extraction</td>
	<td><strong>Small dataset</strong> (<10K images)</td>
	<td>Freeze all layers, train only final FC layer</td>
	</tr>
	<tr>
	<td>Fine-tuning</td>
	<td><strong>Medium dataset</strong> (10K-100K)</td>
	<td>Freeze early layers, train last few + FC</td>
	</tr>
	<tr>
	<td>Full Training</td>
	<td><strong>Large dataset</strong> (>1M images)</td>
	<td>Use pre-trained as initialization, train all</td>
	</tr>
	</table>

	<div class="callout tip">
	<div class="callout-title">💡 Best Practices</div>
	• Use pre-trained models when dataset < 100K images<br>
	• Start with low learning rate (1e-4) for fine-tuning<br>
	• Popular backbones: ResNet50, EfficientNet, ViT
	</div>
	`,
	concepts: `
	<h3>Why Transfer Learning Works</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Feature Hierarchy:</strong> Early layers learn universal features (edges, textures) that transfer across domains</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Domain Similarity:</strong> The more similar source and target domains, the better transfer</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Regularization Effect:</strong> Pre-trained weights act as strong priors, preventing overfitting</div>
	</div>

	<h3>Transfer Learning Quadrant</h3>
	<table>
	<tr>
	<th></th>
	<th>Similar Domain</th>
	<th>Different Domain</th>
	</tr>
	<tr>
	<td><strong>Large Data</strong></td>
	<td>Fine-tune all layers</td>
	<td>Fine-tune top layers</td>
	</tr>
	<tr>
	<td><strong>Small Data</strong></td>
	<td>Feature extraction</td>
	<td>Feature extraction (risky)</td>
	</tr>
	</table>
	`,
	math: `
	<h3>Learning Rate Strategies</h3>
	<p>Different layers need different learning rates during fine-tuning.</p>

	<div class="formula">
	Discriminative Fine-tuning:<br>
	lr_layer_n = lr_base × decay^(L-n)<br>
	<br>
	Where L = total layers, n = layer index<br>
	Example: lr_base=1e-3, decay=0.9<br>
	Layer 1: 1e-3 × 0.9^9 ≈ 3.9e-4<br>
	Layer 10: 1e-3 × 0.9^0 = 1e-3
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Domain Shift</div>
	When source and target distributions differ:<br>
	• <strong>Covariate Shift:</strong> P(X) changes, P(Y\|X) same<br>
	• <strong>Label Shift:</strong> P(Y) changes, P(X\|Y) same<br>
	• <strong>Concept Shift:</strong> P(Y\|X) changes<br>
	Transfer learning handles covariate shift well but struggles with concept shift.
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🏥 Medical Imaging</div>
	<div class="box-content">
	Train on ImageNet, fine-tune for X-ray diagnosis with only 1000 labeled images. Achieves 90%+ accuracy vs 60% from scratch.
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🛒 Retail & E-commerce</div>
	<div class="box-content">
	Product classification, visual search, inventory management using pre-trained ResNet/EfficientNet models.
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">🌍 Satellite Imagery</div>
	<div class="box-content">
	Land use classification, deforestation detection, urban planning using models pre-trained on aerial imagery.
	</div>
	</div>
	`
	},
	"localization": {
	overview: `
	<h3>Object Localization</h3>
	<p>Predict both class and bounding box for a single object in image.</p>

	<h3>Multi-Task Loss</h3>
	<div class="formula">
	Total Loss = L_classification + λ × L_bbox<br>
	<br>
	Where:<br>
	L_classification = Cross-Entropy<br>
	L_bbox = Smooth L1 or IoU loss<br>
	λ = balance term (typically 1-10)
	</div>

	<h3>Bounding Box Representation</h3>
	<ul>
	<li><strong>Option 1:</strong> (x_min, y_min, x_max, y_max)</li>
	<li><strong>Option 2:</strong> (x_center, y_center, width, height) ← Most common</li>
	</ul>
	`,
	concepts: `
	<h3>Localization vs Detection</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Classification:</strong> What is in the image? → "Cat"</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Localization:</strong> Where is the single object? → "Cat at [100, 50, 200, 150]"</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Detection:</strong> Where are ALL objects? → Multiple bounding boxes</div>
	</div>

	<h3>Network Architecture</h3>
	<p>Modify a classification network (ResNet, VGG) by adding a regression head:</p>
	<div class="formula">
	CNN Backbone → Feature Map → [Classification Head (1000 classes)]<br>
	→ [Regression Head (4 coordinates)]
	</div>
	`,
	math: `
	<h3>Smooth L1 Loss (Huber Loss)</h3>
	<p>Combines L1 and L2 loss for robust bounding box regression.</p>

	<div class="formula">
	SmoothL1(x) = { 0.5x² if \|x\| < 1<br>
	{ \|x\| - 0.5 otherwise
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Why Smooth L1?</div>
	• <strong>L2 Loss:</strong> Penalizes large errors too much (squared), sensitive to outliers<br>
	• <strong>L1 Loss:</strong> Robust to outliers but has discontinuous gradient at 0<br>
	• <strong>Smooth L1:</strong> Best of both worlds - quadratic near 0, linear for large errors
	</div>

	<h3>IoU Loss</h3>
	<div class="formula">
	L_IoU = 1 - IoU(pred, target)<br>
	Where IoU = Intersection / Union
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🚗 Self-Driving Cars</div>
	<div class="box-content">Localize the primary vehicle ahead for adaptive cruise control</div>
	</div>
	<div class="info-box">
	<div class="box-title">📸 Photo Auto-Crop</div>
	<div class="box-content">Detect main subject and automatically crop to optimal composition</div>
	</div>
	<div class="info-box">
	<div class="box-title">🏥 Medical Imaging</div>
	<div class="box-content">Localize tumors, organs, or anomalies in X-rays and CT scans</div>
	</div>
	`
	},
	"rcnn": {
	overview: `
	<h3>R-CNN Family Evolution</h3>
	<table>
	<tr>
	<th>Model</th>
	<th>Year</th>
	<th>Speed (FPS)</th>
	<th>Key Innovation</th>
	</tr>
	<tr>
	<td>R-CNN</td>
	<td>2014</td>
	<td>0.05</td>
	<td>Selective Search + CNN features</td>
	</tr>
	<tr>
	<td>Fast R-CNN</td>
	<td>2015</td>
	<td>0.5</td>
	<td>RoI Pooling (share conv features)</td>
	</tr>
	<tr>
	<td>Faster R-CNN</td>
	<td>2015</td>
	<td>7</td>
	<td>Region Proposal Network (RPN)</td>
	</tr>
	<tr>
	<td>Mask R-CNN</td>
	<td>2017</td>
	<td>5</td>
	<td>+ Instance Segmentation masks</td>
	</tr>
	</table>

	<div class="callout tip">
	<div class="callout-title">💡 When to Use</div>
	Faster R-CNN: Best accuracy for detection (not real-time)<br>
	Mask R-CNN: Detection + instance segmentation
	</div>
	`,
	concepts: `
	<h3>Two-Stage Detection Pipeline</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Stage 1 - Region Proposal:</strong> Find ~2000 candidate regions that might contain objects</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Stage 2 - Classification:</strong> Classify each region and refine bounding box</div>
	</div>

	<h3>Region Proposal Network (RPN)</h3>
	<p>The key innovation of Faster R-CNN - learns to propose regions instead of using hand-crafted algorithms.</p>
	<div class="formula">
	RPN Output per location:<br>
	• k anchor boxes × 4 coordinates = 4k regression outputs<br>
	• k anchor boxes × 2 objectness scores = 2k classification outputs<br>
	Typical k = 9 (3 scales × 3 aspect ratios)
	</div>
	`,
	math: `
	<h3>RoI Pooling: Fixed-Size Feature Maps</h3>
	<p>Convert variable-size regions into fixed 7×7 feature maps for FC layers.</p>

	<div class="formula">
	For each RoI of size H×W:<br>
	1. Divide into 7×7 grid (cells of size H/7 × W/7)<br>
	2. Max-pool each cell → single value<br>
	3. Output: Fixed 7×7 feature map regardless of input size
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: RoI Align vs RoI Pool</div>
	<strong>Problem:</strong> RoI Pooling quantizes coordinates, causing misalignment.<br>
	<strong>Solution:</strong> RoI Align uses bilinear interpolation instead of rounding.<br>
	This is critical for Mask R-CNN where pixel-level accuracy matters!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🏥 Medical Imaging</div>
	<div class="box-content">High-accuracy tumor detection where speed is less critical than precision</div>
	</div>
	<div class="info-box">
	<div class="box-title">📷 Photo Analysis</div>
	<div class="box-content">Face detection, scene understanding, object counting in static images</div>
	</div>
	<div class="info-box">
	<div class="box-title">🔬 Scientific Research</div>
	<div class="box-content">Cell detection, particle tracking, microscopy image analysis</div>
	</div>
	`
	},
	"ssd": {
	overview: `
	<h3>SSD (Single Shot MultiBox Detector)</h3>
	<p>Balances speed and accuracy by predicting boxes at multiple scales.</p>

	<h3>Key Ideas</h3>
	<ul>
	<li><strong>Multi-Scale:</strong> Predictions from different layers (early = small objects, deep = large)</li>
	<li><strong>Default Boxes (Anchors):</strong> Pre-defined boxes of various aspects ratios</li>
	<li><strong>Single Pass:</strong> No separate region proposal step</li>
	</ul>

	<div class="callout insight">
	<div class="callout-title">📊 Performance</div>
	SSD300: 59 FPS, 74.3% mAP<br>
	SSD512: 22 FPS, 76.8% mAP<br>
	<br>
	Sweet spot between YOLO (faster) and Faster R-CNN (more accurate)
	</div>
	`,
	concepts: `
	<h3>Multi-Scale Feature Maps</h3>
	<p>SSD makes predictions at multiple layers, each detecting objects at different scales.</p>

	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Early Layers (38×38):</strong> Detect small objects (high resolution)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Middle Layers (19×19, 10×10):</strong> Detect medium objects</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Deep Layers (5×5, 3×3, 1×1):</strong> Detect large objects</div>
	</div>

	<h3>Default Boxes (Anchors)</h3>
	<p>At each feature map cell, SSD predicts offsets for k default boxes with different aspect ratios (1:1, 2:1, 1:2, 3:1, 1:3).</p>
	`,
	math: `
	<h3>SSD Loss Function</h3>
	<p>Weighted sum of localization and confidence losses.</p>

	<div class="formula">
	L(x, c, l, g) = (1/N) × [L_conf(x, c) + α × L_loc(x, l, g)]<br>
	<br>
	Where:<br>
	• L_conf = Softmax loss over class confidences<br>
	• L_loc = Smooth L1 loss over box coordinates<br>
	• α = Weight factor (typically 1)<br>
	• N = Number of matched default boxes
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Hard Negative Mining</div>
	Problem: Most default boxes are background (class imbalance).<br>
	Solution: Sort negative boxes by confidence loss, pick top ones so pos:neg = 1:3.<br>
	This focuses training on hard negatives, not easy ones.
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📹 Video Analytics</div>
	<div class="box-content">Real-time object detection in security cameras, sports broadcasting</div>
	</div>
	<div class="info-box">
	<div class="box-title">🤖 Robotics</div>
	<div class="box-content">Object detection for manipulation tasks, obstacle avoidance</div>
	</div>
	<div class="info-box">
	<div class="box-title">📱 Mobile Apps</div>
	<div class="box-content">Lightweight models for on-device detection (MobileNet-SSD)</div>
	</div>
	`
	},
	"semantic-seg": {
	overview: `
	<h3>Semantic Segmentation</h3>
	<p>Classify every pixel in the image (pixel-wise classification).</p>

	<h3>Popular Architectures</h3>
	<table>
	<tr>
	<th>Model</th>
	<th>Key Feature</th>
	</tr>
	<tr>
	<td>FCN</td>
	<td>Fully Convolutional (no FC layers)</td>
	</tr>
	<tr>
	<td>U-Net</td>
	<td>Skip connections from encoder to decoder</td>
	</tr>
	<tr>
	<td>DeepLab</td>
	<td>Atrous (dilated) convolutions + ASPP</td>
	</tr>
	</table>

	<div class="formula">
	U-Net Pattern:<br>
	Input → Encoder (downsample) → Bottleneck → Decoder (upsample) → Pixel-wise Output<br>
	With skip connections from encoder to decoder at each level
	</div>
	`,
	concepts: `
	<h3>Key Concepts</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Encoder-Decoder:</strong> Downsample to capture context, upsample to recover spatial detail</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Skip Connections:</strong> Pass high-resolution features from encoder to decoder (U-Net)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Atrous Convolution:</strong> Expand receptive field without losing resolution (DeepLab)</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>ASPP:</strong> Atrous Spatial Pyramid Pooling - capture multi-scale context</div>
	</div>
	`,
	math: `
	<h3>Dice Loss for Segmentation</h3>
	<p>Better than cross-entropy for imbalanced classes (small objects).</p>

	<div class="formula">
	Dice = 2 × \|A ∩ B\| / (\|A\| + \|B\|)<br>
	Dice Loss = 1 - Dice<br>
	<br>
	Where A = predicted mask, B = ground truth mask
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Why Dice > Cross-Entropy?</div>
	If only 1% of pixels are foreground:<br>
	• Cross-Entropy: Model can get 99% accuracy by predicting all background!<br>
	• Dice: Penalizes missed foreground pixels heavily<br>
	• Often use combination: L = BCE + Dice
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🏥 Medical Imaging</div>
	<div class="box-content">Tumor segmentation, organ delineation, cell analysis</div>
	</div>
	<div class="info-box">
	<div class="box-title">🚗 Autonomous Driving</div>
	<div class="box-content">Road segmentation, free space detection, drivable area</div>
	</div>
	`
	},
	"instance-seg": {
	overview: `
	<h3>Instance Segmentation</h3>
	<p>Detect AND segment each individual object (combines object detection + semantic segmentation).</p>

	<h3>Difference from Semantic Segmentation</h3>
	<ul>
	<li><strong>Semantic:</strong> All "person" pixels get same label</li>
	<li><strong>Instance:</strong> Person #1, Person #2, Person #3 (separate instances)</li>
	</ul>

	<h3>Main Approach: Mask R-CNN</h3>
	<div class="formula">
	Faster R-CNN + Segmentation Branch<br>
	<br>
	For each RoI:<br>
	1. Bounding box regression<br>
	2. Class prediction<br>
	3. <strong>Binary mask for the object</strong>
	</div>
	`,
	concepts: `
	<h3>Mask R-CNN Architecture</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Backbone:</strong> ResNet-50/101 with Feature Pyramid Network (FPN)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>RPN:</strong> Region Proposal Network (same as Faster R-CNN)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>RoI Align:</strong> Better than RoI Pooling (no quantization)</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Mask Head:</strong> Small FCN that outputs 28×28 binary mask per class</div>
	</div>
	`,
	math: `
	<h3>Multi-Task Loss</h3>
	<p>Mask R-CNN optimizes three losses simultaneously:</p>

	<div class="formula">
	L = L_cls + L_box + L_mask<br>
	<br>
	Where:<br>
	• L_cls = Classification loss (cross-entropy)<br>
	• L_box = Bounding box regression (smooth L1)<br>
	• L_mask = Binary cross-entropy per-pixel mask loss
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Key Insight: Decoupled Masks</div>
	Mask R-CNN predicts a binary mask for EACH class independently.<br>
	This avoids competition between classes and improves accuracy.
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📸 Photo Editing</div>
	<div class="box-content">Auto-select objects for editing, background removal, composition</div>
	</div>
	<div class="info-box">
	<div class="box-title">🤖 Robotics</div>
	<div class="box-content">Object manipulation - need exact shape, not just bounding box</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎬 Video Production</div>
	<div class="box-content">Rotoscoping, VFX, green screen replacement</div>
	</div>
	`
	},
	"face-recog": {
	overview: `
	<h3>Face Recognition with Siamese Networks</h3>
	<p>Learn similarity between faces using metric learning instead of classification.</p>

	<h3>Triplet Loss Training</h3>
	<div class="formula">
	Loss = max(\|\|f(A) - f(P)\|\|² - \|\|f(A) - f(N)\|\|² + margin, 0)<br>
	<br>
	Where:<br>
	A = Anchor (reference face)<br>
	P = Positive (same person)<br>
	N = Negative (different person)<br>
	margin = minimum separation (e.g., 0.2)
	</div>

	<div class="callout tip">
	<div class="callout-title">💡 One-Shot Learning</div>
	After training, recognize new people with just 1-2 photos!<br>
	No retraining needed - just compare embeddings.
	</div>
	`,
	concepts: `
	<h3>Face Recognition Pipeline</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Face Detection:</strong> Find faces in image (MTCNN, RetinaFace)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Alignment:</strong> Normalize face orientation and scale</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Embedding:</strong> Extract 128/512-dim feature vector (FaceNet, ArcFace)</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Matching:</strong> Compare embeddings with cosine similarity or L2 distance</div>
	</div>

	<h3>Key Models</h3>
	<table>
	<tr><th>Model</th><th>Key Innovation</th></tr>
	<tr><td>FaceNet</td><td>Triplet loss, 128-dim embedding</td></tr>
	<tr><td>ArcFace</td><td>Additive angular margin loss, SOTA accuracy</td></tr>
	<tr><td>DeepFace</td><td>Facebook's early success</td></tr>
	</table>
	`,
	math: `
	<h3>Triplet Loss Intuition</h3>
	<p>Push same-person faces closer, different-person faces apart.</p>

	<div class="formula">
	\|\|f(A) - f(P)\|\|² + margin < \|\|f(A) - f(N)\|\|²
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Hard Triplet Mining</div>
	Easy triplets: Random selection - margin already satisfied, loss=0<br>
	Hard triplets: Find P closest to anchor, N closest to anchor from different class<br>
	<strong>Training on hard triplets is critical for convergence!</strong>
	</div>

	<h3>ArcFace Angular Margin</h3>
	<div class="formula">
	L = -log(e^(s·cos(θ + m)) / (e^(s·cos(θ + m)) + Σ e^(s·cos(θ_j))))<br>
	Where m = angular margin, s = scale factor
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📱 Phone Unlock</div>
	<div class="box-content">Face ID, biometric authentication</div>
	</div>
	<div class="info-box">
	<div class="box-title">🔒 Security</div>
	<div class="box-content">Access control, surveillance, identity verification</div>
	</div>
	`
	},
	"autoencoders": {
	overview: `
	<h3>Autoencoders</h3>
	<p>Unsupervised learning to compress data into latent representation and reconstruct it.</p>

	<h3>Architecture</h3>
	<div class="formula">
	Input → Encoder → Latent Code (bottleneck) → Decoder → Reconstruction<br>
	<br>
	Loss = \|\|Input - Reconstruction\|\|² (MSE)
	</div>

	<h3>Variants</h3>
	<ul>
	<li><strong>Vanilla:</strong> Basic autoencoder</li>
	<li><strong>Denoising:</strong> Input corrupted, output clean (learns robust features)</li>
	<li><strong>Variational (VAE):</strong> Probabilistic latent space (for generation)</li>
	<li><strong>Sparse:</strong> Encourage sparse activations</li>
	</ul>
	`,
	concepts: `
	<h3>Key Concepts</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Bottleneck:</strong> Force information compression by using fewer dimensions than input</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Reconstruction:</strong> Learn to recreate input - captures essential features</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Latent Space:</strong> Compressed representation captures data structure</div>
	</div>

	<h3>Variational Autoencoder (VAE)</h3>
	<p>Instead of encoding to a point, encode to a probability distribution (mean + variance).</p>
	<div class="formula">
	Encoder outputs: μ (mean) and σ (standard deviation)<br>
	Sample: z = μ + σ × ε (where ε ~ N(0,1))<br>
	This is the "reparameterization trick" for backprop!
	</div>
	`,
	math: `
	<h3>VAE Loss Function (ELBO)</h3>
	<p>VAE maximizes the Evidence Lower Bound:</p>

	<div class="formula">
	L = E[log p(x\|z)] - KL(q(z\|x) \|\| p(z))<br>
	<br>
	Where:<br>
	• First term: Reconstruction quality<br>
	• Second term: KL divergence regularization (push q toward N(0,1))
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: KL Divergence</div>
	For Gaussians:<br>
	KL = -0.5 × Σ(1 + log(σ²) - μ² - σ²)<br>
	This has a closed-form solution - no sampling needed!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🗜️ Compression</div>
	<div class="box-content">Dimensionality reduction, data compression, feature extraction</div>
	</div>
	<div class="info-box">
	<div class="box-title">🔍 Anomaly Detection</div>
	<div class="box-content">High reconstruction error = anomaly (fraud detection, defect detection)</div>
	</div>
	`
	},
	"gans": {
	overview: `
	<h3>GANs (Generative Adversarial Networks)</h3>
	<p>Two networks compete: Generator creates fake data, Discriminator tries to detect fakes.</p>

	<h3>The GAN Game</h3>
	<div class="formula">
	Generator: Creates fake images from random noise<br>
	Goal: Fool discriminator<br>
	<br>
	Discriminator: Classifies real vs fake<br>
	Goal: Correctly identify fakes<br>
	<br>
	Minimax Loss:<br>
	min_G max_D E[log D(x)] + E[log(1 - D(G(z)))]
	</div>

	<div class="callout warning">
	<div class="callout-title">⚠️ Training Challenges</div>
	• Mode collapse (Generator produces limited variety)<br>
	• Training instability (careful tuning needed)<br>
	• Convergence issues<br>
	• Solutions: Wasserstein GAN, Spectral Normalization, StyleGAN improvements
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🎨 Image Generation</div>
	<div class="box-content">
	<strong>StyleGAN:</strong> Photorealistic faces, art generation<br>
	<strong>DCGAN:</strong> Bedroom images, object generation
	</div>
	</div>
	`,
	math: `
	<h3>The Minimax Game Objective</h3>
	<p>The original GAN objective from Ian Goodfellow (2014) is a zero-sum game between Discriminator (D) and Generator (G).</p>

	<div class="formula" style="font-size: 1.1rem; padding: 20px;">
	min_G max_D V(D, G) = E_x∼p_data[log D(x)] + E_z∼p_z[log(1 - D(G(z)))]
	</div>

	<h3>Paper & Pain: Finding the Optimal Discriminator</h3>
	<p>For a fixed Generator, the optimal Discriminator D* is:</p>
	<div class="formula">
	D*(x) = p_data(x) / (p_data(x) + p_g(x))
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Theoretical Insight</div>
	When the Discriminator is optimal, the Generator's task is essentially to minimize the <strong>Jensen-Shannon Divergence (JSD)</strong> between the data distribution and the model distribution. <br>
	<strong>Problem:</strong> JSD is "flat" when distributions don't overlap, leading to vanishing gradients. This is why <strong>Wasserstein GAN (WGAN)</strong> was invented—using Earth Mover's distance instead!
	</div>

	<h3>Generator Gradient Problem</h3>
	<p>Early in training, D(G(z)) is near 0. The term log(1-D(G(z))) has a very small gradient. </p>
	<div class="list-item">
	<div class="list-num">💡</div>
	<div><strong>Heuristic Fix:</strong> Instead of minimizing log(1-D(G(z))), we maximize <strong>log D(G(z))</strong>. This provides much stronger gradients early on!</div>
	</div>
	`
	},
	"diffusion": {
	overview: `
	<h3>Diffusion Models</h3>
	<p>Learn to reverse a gradual noising process, generating high-quality images.</p>

	<h3>How Diffusion Works</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Forward Process:</strong> Gradually add Gaussian noise over T steps (x₀ → x₁ → ... → x_T = pure noise)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Reverse Process:</strong> Train neural network to denoise (x_T → x_{T-1} → ... → x₀ = clean image)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Generation:</strong> Start from random noise, iteratively denoise T steps</div>
	</div>

	<div class="callout tip">
	<div class="callout-title">✅ Advantages over GANs</div>
	• More stable training (no adversarial dynamics)<br>
	• Better sample quality and diversity<br>
	• Mode coverage (no mode collapse)<br>
	• Controllable generation (text-to-image)
	</div>
	`,
	concepts: `
	<h3>Key Components</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>U-Net Backbone:</strong> Encoder-decoder with skip connections predicts noise at each step</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Time Embedding:</strong> Tell the model which timestep it's at (sinusoidal encoding)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>CLIP Conditioning:</strong> Guide generation with text embeddings (Stable Diffusion)</div>
	</div>

	<h3>Latent Diffusion</h3>
	<p>Instead of diffusing in pixel space (expensive), work in VAE latent space (8× smaller).</p>
	<div class="formula">
	Image (512×512×3) → VAE Encoder → Latent (64×64×4) → Diffuse → Decode
	</div>
	`,
	math: `
	<h3>Forward Process (Noising)</h3>
	<p>Add Gaussian noise according to a schedule β_t:</p>

	<div class="formula">
	q(x_t \| x_{t-1}) = N(x_t; √(1-β_t) × x_{t-1}, β_t × I)<br>
	<br>
	Or in closed form for any t:<br>
	x_t = √(ᾱ_t) × x_0 + √(1-ᾱ_t) × ε<br>
	Where ᾱ_t = Π_{s=1}^t (1-β_s)
	</div>

	<h3>Training Objective</h3>
	<p>Simple noise prediction loss:</p>
	<div class="formula">
	L = E[\|\|ε - ε_θ(x_t, t)\|\|²]
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Simplified Loss</div>
	The full variational bound is complex, but Ho et al. (2020) showed this simple MSE loss on noise prediction works just as well and is much easier to implement!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🖼️ Text-to-Image</div>
	<div class="box-content">
	<strong>Stable Diffusion:</strong> Open-source, runs on consumer GPUs<br>
	<strong>DALL-E 2:</strong> OpenAI's photorealistic generator<br>
	<strong>Midjourney:</strong> Artistic image generation
	</div>
	</div>
	`
	},
	"rnn": {
	overview: `
	<h3>RNNs & LSTMs</h3>
	<p>Process sequences by maintaining hidden state that captures past information.</p>

	<h3>The Vanishing Gradient Problem</h3>
	<p><strong>Problem:</strong> Standard RNNs can't learn long-term dependencies (gradients vanish over many time steps)</p>
	<p><strong>Solution:</strong> LSTM (Long Short-Term Memory) with gating mechanisms</p>

	<h3>LSTM Gates</h3>
	<ul>
	<li><strong>Forget Gate:</strong> What to remove from cell state</li>
	<li><strong>Input Gate:</strong> What new information to add</li>
	<li><strong>Output Gate:</strong> What to output as hidden state</li>
	</ul>

	<div class="callout warning">
	<div class="callout-title">⚠️ Limitation</div>
	Sequential processing (can't parallelize) - Transformers solved this!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">📝 Text Generation</div>
	<div class="box-content">Character-level generation, autocomplete (before Transformers)</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎵 Time Series</div>
	<div class="box-content">Stock prediction, weather forecasting, music generation</div>
	</div>
	`,
	math: `
	<h3>RNN State Equations</h3>
	<p>Standard RNN processes a sequence x₁, x₂, ..., xₜ using a recurring hidden state hₜ.</p>

	<div class="formula">
	hₜ = tanh(Wₕₕhₜ₋₁ + Wₓₕxₜ + bₕ)<br>
	yₜ = Wₕᵧhₜ + bᵧ
	</div>

	<h3>Paper & Pain: The Vanishing Gradient Derivation</h3>
	<p>Why do RNNs fail on long sequences? Let's check the gradient ∂L/∂h₁:</p>
	<div class="formula">
	∂L/∂h₁ = (∂L/∂hₜ) × (∂hₜ/∂hₜ₋₁) × (∂hₜ₋₁/∂hₜ₋₂) × ... × (∂h₂/∂h₁)<br>
	<br>
	Where ∂hⱼ/∂hⱼ₋₁ = Wₕₕᵀ diag(tanh'(zⱼ))
	</div>
	<div class="callout warning">
	<div class="callout-title">⚠️ The Power Effect</div>
	If the largest eigenvalue of Wₕₕ < 1: Gradients <strong>shrink exponentially</strong> (0.9¹⁰⁰ ≈ 0.00002).<br>
	If > 1: Gradients <strong>explode</strong>.<br>
	<strong>LSTM Solution:</strong> The "Constant Error Carousel" (CEC) ensures gradients flow via the cell state without multiplication.
	</div>

	<h3>LSTM Gating Math</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div>Forget Gate: fₜ = σ(W_f[hₜ₋₁, xₜ] + b_f)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div>Input Gate: iₜ = σ(W_i[hₜ₋₁, xₜ] + b_i)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div>Cell State Update: cₜ = fₜcₜ₋₁ + iₜtanh(W_c[hₜ₋₁, xₜ] + b_c)</div>
	</div>
	`
	},
	"bert": {
	overview: `
	<h3>BERT (Bidirectional Encoder Representations from Transformers)</h3>
	<p>Pre-trained encoder-only Transformer for understanding language (not generation).</p>

	<h3>Key Innovation: Bidirectional Context</h3>
	<p>Unlike GPT (left-to-right), BERT sees both left AND right context simultaneously.</p>

	<h3>Pre-training Tasks</h3>
	<ul>
	<li><strong>Masked Language Modeling:</strong> Mask 15% of tokens, predict them (e.g., "The cat [MASK] on the mat" → predict "sat")</li>
	<li><strong>Next Sentence Prediction:</strong> Predict if sentence B follows A</li>
	</ul>

	<div class="callout tip">
	<div class="callout-title">💡 Fine-tuning BERT</div>
	1. Start with pre-trained BERT (trained on billions of words)<br>
	2. Add task-specific head (classification, QA, NER)<br>
	3. Fine-tune on your dataset (10K-100K examples)<br>
	4. Achieves SOTA with minimal data!
	</div>
	`,
	concepts: `
	<h3>BERT Architecture</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Encoder Only:</strong> 12/24 Transformer encoder layers (BERT-base/large)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Token Embedding:</strong> WordPiece tokenization (30K vocab)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Segment Embedding:</strong> Distinguish sentence A from sentence B</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>[CLS] Token:</strong> Aggregated representation for classification tasks</div>
	</div>

	<h3>Model Sizes</h3>
	<table>
	<tr><th>Model</th><th>Layers</th><th>Hidden</th><th>Params</th></tr>
	<tr><td>BERT-base</td><td>12</td><td>768</td><td>110M</td></tr>
	<tr><td>BERT-large</td><td>24</td><td>1024</td><td>340M</td></tr>
	</table>
	`,
	math: `
	<h3>Masked Language Modeling (MLM)</h3>
	<p>BERT's main pre-training objective:</p>

	<div class="formula">
	L_MLM = -Σ log P(x_masked \| x_visible)<br>
	<br>
	For each masked token, predict using cross-entropy loss
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Masking Strategy</div>
	Of the 15% tokens selected for masking:<br>
	• 80% → [MASK] token<br>
	• 10% → Random token<br>
	• 10% → Keep original<br>
	This prevents over-reliance on [MASK] during fine-tuning!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🔍 Search & QA</div>
	<div class="box-content">
	<strong>Google Search:</strong> Uses BERT for understanding queries<br>
	Question answering systems, document retrieval
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">📊 Text Classification</div>
	<div class="box-content">Sentiment analysis, topic classification, spam detection</div>
	</div>
	`
	},
	"gpt": {
	overview: `
	<h3>GPT (Generative Pre-trained Transformer)</h3>
	<p>Decoder-only Transformer trained to predict next token (autoregressive language modeling).</p>

	<h3>GPT Evolution</h3>
	<table>
	<tr>
	<th>Model</th>
	<th>Params</th>
	<th>Training Data</th>
	<th>Capability</th>
	</tr>
	<tr>
	<td>GPT-1</td>
	<td>117M</td>
	<td>BooksCorpus</td>
	<td>Basic text generation</td>
	</tr>
	<tr>
	<td>GPT-2</td>
	<td>1.5B</td>
	<td>WebText (40GB)</td>
	<td>Coherent paragraphs</td>
	</tr>
	<tr>
	<td>GPT-3</td>
	<td>175B</td>
	<td>570GB text</td>
	<td>Few-shot learning</td>
	</tr>
	<tr>
	<td>GPT-4</td>
	<td>~1.8T</td>
	<td>Multi-modal</td>
	<td>Reasoning, coding, images</td>
	</tr>
	</table>

	<div class="callout insight">
	<div class="callout-title">🚀 Emergent Abilities</div>
	As models scale, new capabilities emerge:<br>
	• In-context learning (learn from prompts)<br>
	• Chain-of-thought reasoning<br>
	• Code generation<br>
	• Multi-step problem solving
	</div>
	`,
	concepts: `
	<h3>GPT Architecture</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Decoder Only:</strong> Uses causal (masked) attention - can only see past tokens</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Autoregressive:</strong> Generate one token at a time, feed back as input</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Pre-training:</strong> Next token prediction on massive text corpus</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>RLHF:</strong> Reinforcement Learning from Human Feedback (ChatGPT)</div>
	</div>

	<h3>In-Context Learning</h3>
	<p>GPT-3+ can learn from examples in the prompt without updating weights!</p>
	<div class="formula">
	Zero-shot: "Translate to French: Hello" → "Bonjour"<br>
	Few-shot: "cat→chat, dog→chien, house→?" → "maison"
	</div>
	`,
	math: `
	<h3>Causal Language Modeling</h3>
	<p>GPT is trained to maximize the likelihood of the next token:</p>

	<div class="formula">
	L = -Σ log P(x_t \| x_{<t})<br>
	<br>
	Where P(x_t \| x_{<t}) = softmax(h_t × W_vocab)
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Scaling Laws</div>
	Performance scales predictably with compute, data, and parameters:<br>
	L ∝ N^(-0.076) for model size N<br>
	This is why OpenAI trained GPT-3 (175B) and GPT-4 (1.8T)!
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">💬 ChatGPT & Assistants</div>
	<div class="box-content">
	Conversational AI, customer support, tutoring, brainstorming
	</div>
	</div>
	<div class="info-box">
	<div class="box-title">💻 Code Generation</div>
	<div class="box-content">
	GitHub Copilot, code completion, bug fixing, documentation
	</div>
	</div>
	`
	},
	"vit": {
	overview: `
	<h3>Vision Transformer (ViT)</h3>
	<p>Apply Transformer architecture directly to images by treating them as sequences of patches.</p>

	<h3>How ViT Works</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Patchify:</strong> Split 224×224 image into 16×16 patches (14×14 = 196 patches)</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Linear Projection:</strong> Flatten each patch → linear embedding (like word embeddings)</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Positional Encoding:</strong> Add position information</div>
	</div>
	<div class="list-item">
	<div class="list-num">04</div>
	<div><strong>Transformer Encoder:</strong> Standard Transformer (self-attention, FFN)</div>
	</div>
	<div class="list-item">
	<div class="list-num">05</div>
	<div><strong>Classification:</strong> Use [CLS] token for final prediction</div>
	</div>

	<div class="callout tip">
	<div class="callout-title">💡 When ViT Shines</div>
	• <strong>Large Datasets:</strong> Needs 10M+ images (or pre-training on ImageNet-21K)<br>
	• <strong>Transfer Learning:</strong> Pre-trained ViT beats CNNs on many tasks<br>
	• <strong>Long-Range Dependencies:</strong> Global attention vs CNN's local receptive field
	</div>
	`,
	concepts: `
	<h3>ViT vs CNN Comparison</h3>
	<table>
	<tr><th>Aspect</th><th>CNN</th><th>ViT</th></tr>
	<tr><td>Inductive Bias</td><td>Locality, translation invariance</td><td>Minimal - learns from data</td></tr>
	<tr><td>Data Efficiency</td><td>Better with small datasets</td><td>Needs large datasets</td></tr>
	<tr><td>Receptive Field</td><td>Local (grows with depth)</td><td>Global from layer 1</td></tr>
	<tr><td>Scalability</td><td>Diminishing returns</td><td>Scales well with compute</td></tr>
	</table>

	<h3>Key Innovations</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>No Convolutions:</strong> Pure attention - "An Image is Worth 16x16 Words"</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Learnable Position:</strong> Position embeddings are learned, not sinusoidal</div>
	</div>
	`,
	math: `
	<h3>Patch Embedding</h3>
	<p>Convert image patches to token embeddings:</p>

	<div class="formula">
	z_0 = [x_cls; x_p^1 E; x_p^2 E; ...; x_p^N E] + E_pos<br>
	<br>
	Where:<br>
	• x_p^i = flattened patch (16×16×3 = 768 dimensions)<br>
	• E = learnable linear projection<br>
	• E_pos = position embedding
	</div>

	<div class="callout insight">
	<div class="callout-title">📝 Paper & Pain: Computation</div>
	ViT-Base: 12 layers, 768 hidden, 12 heads ~ 86M params<br>
	Self-attention cost: O(n²·d) where n=196 patches<br>
	This is why ViT is efficient for images (196 tokens) vs text (1000+ tokens)
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">🖼️ Image Classification</div>
	<div class="box-content">SOTA on ImageNet with pre-training. Google/DeepMind use for internal systems.</div>
	</div>
	<div class="info-box">
	<div class="box-title">🔍 Object Detection</div>
	<div class="box-content">DETR, DINO - Transformer-based detection replacing Faster R-CNN</div>
	</div>
	<div class="info-box">
	<div class="box-title">🎬 Video Understanding</div>
	<div class="box-content">VideoViT, TimeSformer - extend patches to 3D (space + time)</div>
	</div>
	`
	},
	"gnn": {
	overview: `
	<h3>Graph Neural Networks (GNNs)</h3>
	<p>Deep learning on non-Euclidean data structures like social networks, molecules, and knowledge graphs.</p>

	<h3>Key Concepts</h3>
	<div class="list-item">
	<div class="list-num">01</div>
	<div><strong>Graph Structure:</strong> Nodes (entities) and Edges (relationships).</div>
	</div>
	<div class="list-item">
	<div class="list-num">02</div>
	<div><strong>Message Passing:</strong> Nodes exchange information with neighbors.</div>
	</div>
	<div class="list-item">
	<div class="list-num">03</div>
	<div><strong>Aggregation:</strong> Combine incoming messages (Sum, Mean, Max).</div>
	</div>

	<div class="callout tip">
	<div class="callout-title">💡 Why GNNs?</div>
	Standard CNNs expect a fixed grid (euclidean). Graphs have arbitrary size and topology. GNNs are permutation invariant!
	</div>
	`,
	concepts: `
	<h3>Message Passing Neural Networks (MPNN)</h3>
	<p>The core framework for most GNNs.</p>

	<div class="list-item">
	<div class="list-num">1</div>
	<div><strong>Message Function:</strong> Compute message from neighbor to node.</div>
	</div>
	<div class="list-item">
	<div class="list-num">2</div>
	<div><strong>Aggregation Function:</strong> Sum all messages from neighbors.</div>
	</div>
	<div class="list-item">
	<div class="list-num">3</div>
	<div><strong>Update Function:</strong> Update node state based on aggregated messages.</div>
	</div>
	`,
	math: `
	<h3>Graph Convolution Network (GCN)</h3>
	<p>The "Hello World" of GNNs (Kipf & Welling, 2017).</p>

	<div class="formula">
	H^{(l+1)} = σ(D^{-1/2} A D^{-1/2} H^{(l)} W^{(l)})
	</div>

	<p>Where:</p>
	<ul>
	<li><strong>A:</strong> Adjacency Matrix (connections)</li>
	<li><strong>D:</strong> Degree Matrix (number of connections)</li>
	<li><strong>H:</strong> Node Features</li>
	<li><strong>W:</strong> Learnable Weights</li>
	</ul>

	<div class="callout warning">
	<div class="callout-title">⚠️ Over-smoothing</div>
	If GNN is too deep, all node representations become indistinguishable. Usually 2-4 layers are enough.
	</div>
	`,
	applications: `
	<div class="info-box">
	<div class="box-title">💊 Drug Discovery</div>
	<div class="box-content">Predicting molecular properties, protein folding (AlphaFold)</div>
	</div>
	<div class="info-box">
	<div class="box-title">🚗 Traffic Prediction</div>
	<div class="box-content">Road networks, estimating travel times (Google Maps)</div>
	</div>
	<div class="info-box">
	<div class="box-title">🛒 Recommender Systems</div>
	<div class="box-content">Pinterest (PinSage), User-Item graphs</div>
	</div>
	`
	}
	};

	function createModuleHTML(module) {
	const content = MODULE_CONTENT[module.id] \|\| {};

	return `
	<div class="module" id="${module.id}-module">
	<button class="btn-back" onclick="switchTo('dashboard')">← Back to Dashboard</button>
	<header>
	<h1>${module.icon} ${module.title}</h1>
	<p class="subtitle">${module.description}</p>
	</header>

	<div class="tabs">
	<button class="tab-btn active" onclick="switchTab(event, '${module.id}-overview')">Overview</button>
	<button class="tab-btn" onclick="switchTab(event, '${module.id}-concepts')">Key Concepts</button>
	<button class="tab-btn" onclick="switchTab(event, '${module.id}-visualization')">📊 Visualization</button>
	<button class="tab-btn" onclick="switchTab(event, '${module.id}-math')">Math</button>
	<button class="tab-btn" onclick="switchTab(event, '${module.id}-applications')">Applications</button>
	<button class="tab-btn" onclick="switchTab(event, '${module.id}-summary')">Summary</button>
	</div>

	<div id="${module.id}-overview" class="tab active">
	<div class="section">
	<h2>📖 Overview</h2>
	${content.overview \|\| `
	<p>Complete coverage of ${module.title.toLowerCase()}. Learn the fundamentals, mathematics, real-world applications, and implementation details.</p>
	<div class="info-box">
	<div class="box-title">Learning Objectives</div>
	<div class="box-content">
	✓ Understand core concepts and theory<br>
	✓ Master mathematical foundations<br>
	✓ Learn practical applications<br>
	✓ Implement and experiment
	</div>
	</div>
	`}
	</div>
	</div>

	<div id="${module.id}-concepts" class="tab">
	<div class="section">
	<h2>🎯 Key Concepts</h2>
	${content.concepts \|\| `
	<p>Fundamental concepts and building blocks for ${module.title.toLowerCase()}.</p>
	<div class="callout insight">
	<div class="callout-title">💡 Main Ideas</div>
	This section covers the core ideas you need to understand before diving into mathematics.
	</div>
	`}
	</div>
	</div>

	<div id="${module.id}-visualization" class="tab">
	<div class="section">
	<h2>📊 Interactive Visualization</h2>
	<p>Visual representation to help understand ${module.title.toLowerCase()} concepts intuitively.</p>
	<div id="${module.id}-viz" class="viz-container">
	<canvas id="${module.id}-canvas" width="800" height="400" style="border: 1px solid rgba(0, 212, 255, 0.3); border-radius: 8px; background: rgba(0, 212, 255, 0.02);"></canvas>
	</div>
	<div class="viz-controls">
	<button onclick="drawVisualization('${module.id}')" class="btn-viz">🔄 Refresh Visualization</button>
	<button onclick="toggleVizAnimation('${module.id}')" class="btn-viz">▶️ Animate</button>
	<button onclick="downloadViz('${module.id}')" class="btn-viz">⬇️ Save Image</button>
	</div>
	</div>
	</div>

	<div id="${module.id}-math" class="tab">
	<div class="section">
	<h2>📐 Mathematical Foundation</h2>
	${content.math \|\| `
	<p>Rigorous mathematical treatment of ${module.title.toLowerCase()}.</p>
	<div class="formula">
	Mathematical formulas and derivations go here
	</div>
	`}
	</div>
	</div>

	<div id="${module.id}-applications" class="tab">
	<div class="section">
	<h2>🌍 Real-World Applications</h2>
	${content.applications \|\| `
	<p>How ${module.title.toLowerCase()} is used in practice across different industries.</p>
	<div class="info-box">
	<div class="box-title">Use Cases</div>
	<div class="box-content">
	Common applications and practical examples
	</div>
	</div>
	`}
	</div>
	</div>

	<div id="${module.id}-summary" class="tab">
	<div class="section">
	<h2>✅ Summary</h2>
	<div class="info-box">
	<div class="box-title">Key Takeaways</div>
	<div class="box-content">
	✓ Essential concepts covered<br>
	✓ Mathematical foundations understood<br>
	✓ Real-world applications identified<br>
	✓ Ready for implementation
	</div>
	</div>
	</div>
	</div>
	</div>
	`;
	}

	function initDashboard() {
	const grid = document.getElementById("modulesGrid");
	const container = document.getElementById("modulesContainer");

	modules.forEach(module => {
	const card = document.createElement("div");
	card.className = "card";
	card.style.borderColor = module.color;
	card.onclick = () => switchTo(module.id + "-module");
	card.innerHTML = `
	<div class="card-icon">${module.icon}</div>
	<h3>${module.title}</h3>
	<p>${module.description}</p>
	<span class="category-label">${module.category}</span>
	`;
	grid.appendChild(card);

	const moduleHTML = createModuleHTML(module);
	container.innerHTML += moduleHTML;
	});
	}

	function switchTo(target) {
	document.querySelectorAll('.dashboard, .module').forEach(el => {
	el.classList.remove('active');
	});
	const elem = document.getElementById(target);
	if (elem) elem.classList.add('active');
	}

	function switchTab(e, tabId) {
	const module = e.target.closest('.module');
	if (!module) return;

	module.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
	module.querySelectorAll('.tab-btn').forEach(b => b.classList.remove('active'));

	const tab = document.getElementById(tabId);
	if (tab) tab.classList.add('active');
	e.target.classList.add('active');

	// Trigger visualization when tabs are clicked
	setTimeout(() => {
	const moduleId = tabId.split('-')[0];
	if (tabId.includes('-concepts')) {
	drawConceptsVisualization(moduleId);
	} else if (tabId.includes('-visualization')) {
	drawConceptsVisualization(moduleId);
	} else if (tabId.includes('-math')) {
	drawMathVisualization(moduleId);
	} else if (tabId.includes('-applications')) {
	drawApplicationVisualization(moduleId);
	}
	}, 150);
	}

	// Visualization Functions - Concepts Tab
	function drawConceptsVisualization(moduleId) {
	const canvas = document.getElementById(moduleId + '-canvas');
	if (!canvas) return;

	const ctx = canvas.getContext('2d');
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	ctx.fillStyle = '#0f1419';
	ctx.fillRect(0, 0, canvas.width, canvas.height);

	const vizMap = {
	'nn-basics': drawNeuronAnimation,
	'perceptron': drawDecisionBoundary,
	'mlp': drawNetworkGraph,
	'activation': drawActivationFunctions,
	'weight-init': drawWeightDistribution,
	'loss': drawLossLandscape,
	'optimizers': drawConvergencePaths,
	'backprop': drawGradientFlow,
	'regularization': drawOverfitComparison,
	'batch-norm': drawBatchNormalization,
	'cv-intro': drawImageMatrix,
	'conv-layer': drawConvolutionAnimation,
	'pooling': drawPoolingDemo,
	'cnn-basics': drawCNNArchitecture,
	'viz-filters': drawLearnedFilters,
	'lenet': drawLeNetArchitecture,
	'alexnet': drawAlexNetArchitecture,
	'vgg': drawVGGArchitecture,
	'resnet': drawResNetArchitecture,
	'inception': drawInceptionModule,
	'mobilenet': drawMobileNetArchitecture,
	'transfer-learning': drawTransferLearning,
	'localization': drawBoundingBoxes,
	'rcnn': drawRCNNPipeline,
	'yolo': drawYOLOGrid,
	'ssd': drawSSDDetector,
	'semantic-seg': drawSemanticSegmentation,
	'instance-seg': drawInstanceSegmentation,
	'face-recog': drawFaceEmbeddings,
	'autoencoders': drawAutoencoderArchitecture,
	'gans': drawGANsGame,
	'diffusion': drawDiffusionProcess,
	'rnn': drawRNNUnrolled,
	'transformers': drawAttentionMatrix,
	'bert': drawBERTProcess,
	'gpt': drawGPTGeneration,
	'vit': drawVisionTransformer,
	'gnn': drawGraphNetwork
	};

	if (vizMap[moduleId]) {
	vizMap[moduleId](ctx, canvas);
	} else {
	drawDefaultVisualization(ctx, canvas);
	}
	}

	// Default Visualization
	function drawDefaultVisualization(ctx, canvas) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;

	ctx.fillStyle = 'rgba(0, 212, 255, 0.2)';
	ctx.fillRect(centerX - 120, centerY - 60, 240, 120);
	ctx.strokeStyle = '#00d4ff';
	ctx.lineWidth = 2;
	ctx.strokeRect(centerX - 120, centerY - 60, 240, 120);

	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 18px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('📊 Interactive Visualization', centerX, centerY - 20);
	ctx.font = '13px Arial';
	ctx.fillText('Custom visualization for this topic', centerX, centerY + 20);
	ctx.font = '11px Arial';
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Click Refresh to render', centerX, centerY + 45);
	}

	// Default Math Visualization
	function drawDefaultMathVisualization(ctx, canvas) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;

	ctx.fillStyle = 'rgba(255, 107, 53, 0.2)';
	ctx.fillRect(centerX - 120, centerY - 60, 240, 120);
	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 2;
	ctx.strokeRect(centerX - 120, centerY - 60, 240, 120);

	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 18px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('📐 Mathematical Formulas', centerX, centerY - 20);
	ctx.font = '13px Arial';
	ctx.fillText('Visual equation derivations', centerX, centerY + 20);
	ctx.font = '11px Arial';
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Click Visualize to render', centerX, centerY + 45);
	}

	// Default Application Visualization
	function drawDefaultApplicationVisualization(ctx, canvas) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;

	ctx.fillStyle = 'rgba(0, 255, 136, 0.2)';
	ctx.fillRect(centerX - 120, centerY - 60, 240, 120);
	ctx.strokeStyle = '#00ff88';
	ctx.lineWidth = 2;
	ctx.strokeRect(centerX - 120, centerY - 60, 240, 120);

	ctx.fillStyle = '#00ff88';
	ctx.font = 'bold 18px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🌍 Real-World Applications', centerX, centerY - 20);
	ctx.font = '13px Arial';
	ctx.fillText('Practical use cases and examples', centerX, centerY + 20);
	ctx.font = '11px Arial';
	ctx.fillStyle = '#ffa500';
	ctx.fillText('Click Show Applications to render', centerX, centerY + 45);
	}

	// Activation Functions Visualization
	function drawActivationFunctions(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;
	const centerX = width / 2;
	const centerY = height / 2;
	const scale = 40;

	// Draw grid
	ctx.strokeStyle = 'rgba(0, 212, 255, 0.1)';
	ctx.lineWidth = 1;
	for (let i = -5; i <= 5; i += 1) {
	const x = centerX + i * scale;
	ctx.beginPath();
	ctx.moveTo(x, centerY - 5 * scale);
	ctx.lineTo(x, centerY + 5 * scale);
	ctx.stroke();
	}

	// Draw axes
	ctx.strokeStyle = '#00d4ff';
	ctx.lineWidth = 2;
	ctx.beginPath();
	ctx.moveTo(centerX - 6 * scale, centerY);
	ctx.lineTo(centerX + 6 * scale, centerY);
	ctx.stroke();
	ctx.beginPath();
	ctx.moveTo(centerX, centerY - 6 * scale);
	ctx.lineTo(centerX, centerY + 6 * scale);
	ctx.stroke();

	// Draw activation functions
	const functions = [
	{ name: 'ReLU', color: '#ff6b35', fn: x => Math.max(0, x) },
	{ name: 'Sigmoid', color: '#00ff88', fn: x => 1 / (1 + Math.exp(-x)) },
	{ name: 'Tanh', color: '#ffa500', fn: x => Math.tanh(x) }
	];

	functions.forEach(func => {
	ctx.strokeStyle = func.color;
	ctx.lineWidth = 2;
	ctx.beginPath();
	for (let x = -5; x <= 5; x += 0.1) {
	const y = func.fn(x);
	const canvasX = centerX + x * scale;
	const canvasY = centerY - y * scale;
	if (x === -5) ctx.moveTo(canvasX, canvasY);
	else ctx.lineTo(canvasX, canvasY);
	}
	ctx.stroke();
	});

	// Legend
	ctx.font = 'bold 12px Arial';
	functions.forEach((func, i) => {
	ctx.fillStyle = func.color;
	ctx.fillRect(10, 10 + i * 20, 10, 10);
	ctx.fillStyle = '#e4e6eb';
	ctx.fillText(func.name, 25, 19 + i * 20);
	});
	}

	// Neural Network Graph
	function drawNetworkGraph(ctx, canvas) {
	const layers = [2, 3, 3, 1];
	const width = canvas.width;
	const height = canvas.height;
	const layerWidth = width / (layers.length + 1);

	ctx.fillStyle = 'rgba(0, 212, 255, 0.05)';
	ctx.fillRect(0, 0, width, height);

	// Draw neurons and connections
	const neuronPositions = [];

	layers.forEach((numNeurons, layerIdx) => {
	const x = (layerIdx + 1) * layerWidth;
	const positions = [];

	for (let i = 0; i < numNeurons; i++) {
	const y = height / (numNeurons + 1) * (i + 1);
	positions.push({ x, y });

	// Draw connections to next layer
	if (layerIdx < layers.length - 1) {
	const nextLayerPositions = [];
	const nextX = (layerIdx + 2) * layerWidth;
	for (let j = 0; j < layers[layerIdx + 1]; j++) {
	const nextY = height / (layers[layerIdx + 1] + 1) * (j + 1);
	nextLayerPositions.push({ x: nextX, y: nextY });
	}

	nextLayerPositions.forEach(next => {
	ctx.strokeStyle = 'rgba(0, 212, 255, 0.2)';
	ctx.lineWidth = 1;
	ctx.beginPath();
	ctx.moveTo(x, y);
	ctx.lineTo(next.x, next.y);
	ctx.stroke();
	});
	}
	}

	// Draw neurons
	positions.forEach(pos => {
	ctx.fillStyle = '#00d4ff';
	ctx.beginPath();
	ctx.arc(pos.x, pos.y, 8, 0, Math.PI * 2);
	ctx.fill();
	});

	neuronPositions.push(positions);
	});

	// Labels
	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 12px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Input', layerWidth, height - 10);
	ctx.fillText('Hidden 1', layerWidth * 2, height - 10);
	ctx.fillText('Hidden 2', layerWidth * 3, height - 10);
	ctx.fillText('Output', layerWidth * 4, height - 10);
	}

	// Convolution Animation
	function drawConvolutionAnimation(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;

	// Draw input image
	ctx.fillStyle = 'rgba(0, 212, 255, 0.1)';
	ctx.fillRect(20, 20, 150, 150);
	ctx.strokeStyle = '#00d4ff';
	ctx.lineWidth = 2;
	ctx.strokeRect(20, 20, 150, 150);

	// Draw filter
	ctx.fillStyle = 'rgba(255, 107, 53, 0.1)';
	const filterPos = 60 + Math.sin(Date.now() / 1000) * 40;
	ctx.fillRect(filterPos, 60, 60, 60);
	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 3;
	ctx.strokeRect(filterPos, 60, 60, 60);

	// Draw output
	ctx.fillStyle = 'rgba(0, 255, 136, 0.1)';
	ctx.fillRect(width - 170, 20, 150, 150);
	ctx.strokeStyle = '#00ff88';
	ctx.lineWidth = 2;
	ctx.strokeRect(width - 170, 20, 150, 150);

	// Draw feature map
	for (let i = 0; i < 5; i++) {
	for (let j = 0; j < 5; j++) {
	const intensity = Math.random() * 100;
	ctx.fillStyle = `rgba(0, 212, 255, ${intensity / 100})`;
	ctx.fillRect(width - 160 + i * 25, 30 + j * 25, 20, 20);
	}
	}

	// Labels
	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 12px Arial';
	ctx.textAlign = 'left';
	ctx.fillText('Input Image', 20, 190);
	ctx.fillText('Filter', filterPos, 140);
	ctx.fillText('Feature Map', width - 170, 190);
	}

	// Loss Landscape
	function drawLossLandscape(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;

	for (let x = 0; x < width; x += 20) {
	for (let y = 0; y < height; y += 20) {
	const nx = (x - width / 2) / (width / 4);
	const ny = (y - height / 2) / (height / 4);
	const loss = nx * nx + ny * ny;
	const intensity = Math.min(255, loss * 50);
	ctx.fillStyle = `rgb(${intensity}, ${100}, ${255 - intensity})`;
	ctx.fillRect(x, y, 20, 20);
	}
	}

	// Draw descent path
	ctx.strokeStyle = '#00ff88';
	ctx.lineWidth = 2;
	ctx.beginPath();
	const startX = width / 2 + 80;
	const startY = height / 2 + 80;
	ctx.moveTo(startX, startY);

	for (let i = 0; i < 20; i++) {
	const angle = Math.atan2(startY - height / 2, startX - width / 2);
	const newX = startX - Math.cos(angle) * 15;
	const newY = startY - Math.sin(angle) * 15;
	ctx.lineTo(newX, newY);
	}
	ctx.stroke();

	// Minimum point
	ctx.fillStyle = '#00ff88';
	ctx.beginPath();
	ctx.arc(width / 2, height / 2, 8, 0, Math.PI * 2);
	ctx.fill();
	}

	// YOLO Grid
	function drawYOLOGrid(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;
	const gridSize = 7;
	const cellWidth = width / gridSize;
	const cellHeight = height / gridSize;

	// Draw grid
	ctx.strokeStyle = 'rgba(0, 212, 255, 0.3)';
	ctx.lineWidth = 1;
	for (let i = 0; i <= gridSize; i++) {
	ctx.beginPath();
	ctx.moveTo(i * cellWidth, 0);
	ctx.lineTo(i * cellWidth, height);
	ctx.stroke();

	ctx.beginPath();
	ctx.moveTo(0, i * cellHeight);
	ctx.lineTo(width, i * cellHeight);
	ctx.stroke();
	}

	// Draw detected objects
	const detections = [
	{ x: 2, y: 2, w: 2, h: 2, conf: 0.95 },
	{ x: 4, y: 5, w: 1.5, h: 1.5, conf: 0.87 }
	];

	detections.forEach(det => {
	ctx.fillStyle = `rgba(255, 107, 53, ${det.conf * 0.5})`;
	ctx.fillRect(det.x * cellWidth, det.y * cellHeight, det.w * cellWidth, det.h * cellHeight);
	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 2;
	ctx.strokeRect(det.x * cellWidth, det.y * cellHeight, det.w * cellWidth, det.h * cellHeight);

	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 12px Arial';
	ctx.fillText((det.conf * 100).toFixed(0) + '%', det.x * cellWidth + 5, det.y * cellHeight + 15);
	});
	}

	// Attention Matrix
	function drawAttentionMatrix(ctx, canvas) {
	const size = 8;
	const cellSize = Math.min(canvas.width, canvas.height) / size;

	for (let i = 0; i < size; i++) {
	for (let j = 0; j < size; j++) {
	const distance = Math.abs(i - j);
	const attention = Math.exp(-distance / 2);
	const intensity = Math.floor(attention * 255);
	ctx.fillStyle = `rgb(${intensity}, 100, ${200 - intensity})`;
	ctx.fillRect(i * cellSize, j * cellSize, cellSize, cellSize);
	}
	}

	// Add labels
	ctx.fillStyle = '#e4e6eb';
	ctx.font = '10px Arial';
	ctx.textAlign = 'center';
	for (let i = 0; i < size; i++) {
	ctx.fillText('w' + i, i * cellSize + cellSize / 2, canvas.height - 5);
	}
	}

	// Math Visualization
	function drawMathVisualization(moduleId) {
	const canvas = document.getElementById(moduleId + '-math-canvas');
	if (!canvas) return;

	const ctx = canvas.getContext('2d');
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	ctx.fillStyle = '#0f1419';
	ctx.fillRect(0, 0, canvas.width, canvas.height);

	const mathVizMap = {
	'nn-basics': () => drawNNMath(ctx, canvas),
	'activation': () => drawActivationDerivatives(ctx, canvas),
	'loss': () => drawLossComparison(ctx, canvas),
	'optimizers': () => drawOptimizerSteps(ctx, canvas),
	'backprop': () => drawChainRule(ctx, canvas),
	'conv-layer': () => drawConvolutionMath(ctx, canvas),
	'pooling': () => drawPoolingMath(ctx, canvas),
	'regularization': () => drawRegularizationMath(ctx, canvas),
	'transformers': () => drawAttentionMath(ctx, canvas),
	'rnn': () => drawRNNMath(ctx, canvas),
	'gnn': () => drawGNNMath(ctx, canvas)
	};

	if (mathVizMap[moduleId]) {
	mathVizMap[moduleId]();
	} else {
	drawDefaultMathVisualization(ctx, canvas);
	}
	}

	// Application Visualization
	function drawApplicationVisualization(moduleId) {
	const canvas = document.getElementById(moduleId + '-app-canvas');
	if (!canvas) return;

	const ctx = canvas.getContext('2d');
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	ctx.fillStyle = '#0f1419';
	ctx.fillRect(0, 0, canvas.width, canvas.height);

	const appVizMap = {
	'nn-basics': () => drawNNApplications(ctx, canvas),
	'cnn-basics': () => drawCNNApplications(ctx, canvas),
	'conv-layer': () => drawConvolutionApplications(ctx, canvas),
	'yolo': () => drawYOLOApplications(ctx, canvas),
	'semantic-seg': () => drawSegmentationApplications(ctx, canvas),
	'instance-seg': () => drawInstanceSegmentationApps(ctx, canvas),
	'face-recog': () => drawFaceRecognitionApps(ctx, canvas),
	'transformers': () => drawTransformerApps(ctx, canvas),
	'bert': () => drawBERTApplications(ctx, canvas),
	'gpt': () => drawGPTApplications(ctx, canvas),
	'gans': () => drawGANApplications(ctx, canvas),
	'diffusion': () => drawDiffusionApplications(ctx, canvas),
	'gnn': () => drawGNNApplications(ctx, canvas)
	};

	if (appVizMap[moduleId]) {
	appVizMap[moduleId]();
	} else {
	drawDefaultApplicationVisualization(ctx, canvas);
	}
	}

	// Math visualization helper functions
	function drawNNMath(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 18px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Forward Pass: y = σ(Wx + b)', canvas.width / 2, 50);
	ctx.font = '14px Arial';
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Linear combination + Non-linearity', canvas.width / 2, 100);
	ctx.fillStyle = '#ffa500';
	ctx.fillText('W: weights, b: bias, σ: activation', canvas.width / 2, 150);
	}

	function drawActivationDerivatives(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;
	const centerX = width / 2;
	const centerY = height / 2;
	const scale = 40;

	ctx.strokeStyle = 'rgba(0, 212, 255, 0.2)';
	ctx.lineWidth = 1;
	for (let i = -5; i <= 5; i += 1) {
	ctx.beginPath();
	ctx.moveTo(centerX + i * scale, centerY - 5 * scale);
	ctx.lineTo(centerX + i * scale, centerY + 5 * scale);
	ctx.stroke();
	}

	ctx.strokeStyle = '#00ff88';
	ctx.lineWidth = 3;
	ctx.beginPath();
	for (let x = -5; x <= 5; x += 0.1) {
	const y = 1 / (1 + Math.exp(-x)) * (1 - 1 / (1 + Math.exp(-x)));
	const canvasX = centerX + x * scale;
	const canvasY = centerY - y * scale * 10;
	if (x === -5) ctx.moveTo(canvasX, canvasY);
	else ctx.lineTo(canvasX, canvasY);
	}
	ctx.stroke();

	ctx.fillStyle = '#00ff88';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText("Sigmoid Derivative: σ'(x) = σ(x)(1-σ(x))", canvas.width / 2, 30);
	}

	function drawLossComparison(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;

	// MSE
	ctx.fillStyle = 'rgba(0, 212, 255, 0.2)';
	ctx.fillRect(20, 60, width / 2 - 30, height - 100);
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.fillText('MSE Loss', width / 4, 45);
	ctx.font = '12px Arial';
	ctx.fillText('L = (1/n)Σ(y-ŷ)²', width / 4, 90);
	ctx.fillText('Regression', width / 4, 115);

	// Cross-Entropy
	ctx.fillStyle = 'rgba(255, 107, 53, 0.2)';
	ctx.fillRect(width / 2 + 10, 60, width / 2 - 30, height - 100);
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 14px Arial';
	ctx.fillText('Cross-Entropy Loss', width * 3 / 4, 45);
	ctx.font = '12px Arial';
	ctx.fillText('L = -Σ(y·log(ŷ))', width * 3 / 4, 90);
	ctx.fillText('Classification', width * 3 / 4, 115);
	}

	function drawOptimizerSteps(ctx, canvas) {
	const width = canvas.width;
	const height = canvas.height;
	const centerY = height / 2;

	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('SGD', width / 4, 50);
	ctx.font = '12px Arial';
	ctx.fillText('w = w - α·∇L', width / 4, 100);

	ctx.fillStyle = '#00ff88';
	ctx.font = 'bold 16px Arial';
	ctx.fillText('Momentum', width / 2, 50);
	ctx.font = '12px Arial';
	ctx.fillText('v = β·v + (1-β)·∇L', width / 2, 100);

	ctx.fillStyle = '#ffa500';
	ctx.font = 'bold 16px Arial';
	ctx.fillText('Adam', width * 3 / 4, 50);
	ctx.font = '12px Arial';
	ctx.fillText('Adaptive learning rate', width * 3 / 4, 100);
	}

	function drawChainRule(ctx, canvas) {
	const width = canvas.width;
	ctx.fillStyle = '#00ff88';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Backpropagation Chain Rule', width / 2, 50);
	ctx.font = '12px Arial';
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('dL/dW = dL/dŷ · dŷ/da · da/dz · dz/dW', width / 2, 100);
	ctx.fillStyle = '#ffa500';
	ctx.fillText('Compute gradient by multiplying partial derivatives', width / 2, 150);
	}

	function drawConvolutionMath(ctx, canvas) {
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Convolution Operation', canvas.width / 2, 50);
	ctx.font = '12px Arial';
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('y[i,j] = Σ Σ w[m,n] * x[i+m,j+n] + b', canvas.width / 2, 100);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Sliding window element-wise multiplication and summation', canvas.width / 2, 150);
	}

	function drawPoolingMath(ctx, canvas) {
	const width = canvas.width;
	ctx.fillStyle = '#00ff88';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Max Pooling', width / 3, 50);
	ctx.font = '12px Arial';
	ctx.fillText('y = max(neighborhood)', width / 3, 100);

	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.fillText('Average Pooling', width * 2 / 3, 50);
	ctx.font = '12px Arial';
	ctx.fillText('y = avg(neighborhood)', width * 2 / 3, 100);

	ctx.fillStyle = '#ffa500';
	ctx.font = '11px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Reduces spatial dimensions', width / 2, 150);
	}

	function drawRegularizationMath(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('L1 Regularization: L = Loss + λΣ\|w\|', canvas.width / 2, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('L2 Regularization: L = Loss + λΣw²', canvas.width / 2, 110);
	ctx.fillStyle = '#ffa500';
	ctx.fillText('Prevents overfitting by penalizing large weights', canvas.width / 2, 160);
	}

	function drawAttentionMath(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Attention Mechanism', canvas.width / 2, 50);
	ctx.font = '12px Arial';
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Attention(Q,K,V) = softmax(QK^T/√d_k) · V', canvas.width / 2, 100);
	ctx.fillStyle = '#ffa500';
	ctx.fillText('Query-Key matching determines how much to focus on each value', canvas.width / 2, 150);
	}

	function drawRNNMath(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('RNN Hidden State Update', canvas.width / 2, 50);
	ctx.font = '12px Arial';
	ctx.fillStyle = '#00ff88';
	ctx.fillText('h_t = σ(W_h·h_(t-1) + W_x·x_t + b)', canvas.width / 2, 100);
	ctx.fillStyle = '#ffa500';
	ctx.fillText('Processes sequences step-by-step with recurrent connections', canvas.width / 2, 150);
	}

	// Application visualization helper functions
	function drawNNApplications(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('📱 Stock Price Prediction', canvas.width / 4, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('🏥 Medical Diagnosis', canvas.width / 2, 60);
	ctx.fillStyle = '#ffa500';
	ctx.fillText('🎮 Game AI', canvas.width * 3 / 4, 60);

	ctx.fillStyle = '#ff6b35';
	ctx.font = '12px Arial';
	ctx.fillText('Fraud Detection', canvas.width / 4, 120);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('Recommendation Systems', canvas.width / 2, 120);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Credit Scoring', canvas.width * 3 / 4, 120);
	}

	function drawCNNApplications(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Image Classification', canvas.width / 3, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Object Detection', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#ffa500';
	ctx.font = '12px Arial';
	ctx.fillText('Deep Learning Backbone', canvas.width / 2, 150);
	}

	function drawConvolutionApplications(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('📷 Image Feature Extraction', canvas.width / 3, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('🔍 Edge Detection', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#ffa500';
	ctx.font = '12px Arial';
	ctx.fillText('Foundation of Computer Vision', canvas.width / 2, 150);
	}

	function drawYOLOApplications(ctx, canvas) {
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🚗 Autonomous Driving', canvas.width / 3, 60);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('📹 Real-time Video Detection', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#00ff88';
	ctx.font = '12px Arial';
	ctx.fillText('Ultra-fast inference for live applications', canvas.width / 2, 150);
	}

	function drawSegmentationApplications(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🏥 Medical Imaging', canvas.width / 3, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('🚗 Autonomous Vehicles', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#ffa500';
	ctx.font = '12px Arial';
	ctx.fillText('Pixel-level understanding of scenes', canvas.width / 2, 150);
	}

	function drawInstanceSegmentationApps(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('👥 Person Detection & Tracking', canvas.width / 3, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('🍎 Object Instance Counting', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#ffa500';
	ctx.font = '12px Arial';
	ctx.fillText('Separates overlapping objects', canvas.width / 2, 150);
	}

	function drawFaceRecognitionApps(ctx, canvas) {
	ctx.fillStyle = '#ffa500';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('📱 Phone Unlock', canvas.width / 3, 60);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('🔒 Security Systems', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#00ff88';
	ctx.font = '12px Arial';
	ctx.fillText('Identity verification and access control', canvas.width / 2, 150);
	}

	function drawTransformerApps(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('💬 ChatGPT / LLMs', canvas.width / 3, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('🌐 Machine Translation', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#ffa500';
	ctx.font = '12px Arial';
	ctx.fillText('Foundation of modern NLP and beyond', canvas.width / 2, 150);
	}

	function drawBERTApplications(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🔍 Semantic Search', canvas.width / 3, 60);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('❓ Question Answering', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#ffa500';
	ctx.font = '12px Arial';
	ctx.fillText('Deep language understanding', canvas.width / 2, 150);
	}

	function drawGPTApplications(ctx, canvas) {
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('✍️ Text Generation', canvas.width / 3, 60);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('💡 Idea Assistance', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#00ff88';
	ctx.font = '12px Arial';
	ctx.fillText('Powerful autoregressive language models', canvas.width / 2, 150);
	}

	function drawGANApplications(ctx, canvas) {
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🎨 Image Generation', canvas.width / 3, 60);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('🎭 Style Transfer', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#00ff88';
	ctx.font = '12px Arial';
	ctx.fillText('Creative content generation and enhancement', canvas.width / 2, 150);
	}

	function drawDiffusionApplications(ctx, canvas) {
	ctx.fillStyle = '#ffa500';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🖼️ Image Synthesis', canvas.width / 3, 60);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('🎬 Stable Diffusion', canvas.width * 2 / 3, 60);

	ctx.fillStyle = '#00ff88';
	ctx.font = '12px Arial';
	ctx.fillText('State-of-the-art generative AI', canvas.width / 2, 150);
	}

	// Missing visualization stub functions
	function drawNeuronAnimation(ctx, canvas) {
	drawNetworkGraph(ctx, canvas);
	}

	function drawDecisionBoundary(ctx, canvas) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;

	// Draw decision boundary line
	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 3;
	ctx.beginPath();
	ctx.moveTo(0, centerY);
	ctx.lineTo(canvas.width, centerY);
	ctx.stroke();

	// Draw sample points
	for (let i = 0; i < 20; i++) {
	const x = Math.random() * canvas.width;
	const y = Math.random() * canvas.height;
	ctx.fillStyle = y < centerY ? '#00d4ff' : '#00ff88';
	ctx.beginPath();
	ctx.arc(x, y, 5, 0, Math.PI * 2);
	ctx.fill();
	}
	}

	function drawWeightDistribution(ctx, canvas) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;

	// Draw Gaussian distribution
	ctx.strokeStyle = '#00d4ff';
	ctx.lineWidth = 2;
	ctx.beginPath();
	for (let x = -100; x <= 100; x += 2) {
	const y = Math.exp(-(x * x) / 500) * 80;
	const canvasX = centerX + x;
	const canvasY = centerY - y;
	if (x === -100) ctx.moveTo(canvasX, canvasY);
	else ctx.lineTo(canvasX, canvasY);
	}
	ctx.stroke();

	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Weight Distribution (Xavier/He Init)', centerX, 50);
	}

	function drawConvergencePaths(ctx, canvas) {
	drawLossLandscape(ctx, canvas);
	}

	function drawGradientFlow(ctx, canvas) {
	drawChainRule(ctx, canvas);
	}

	function drawOverfitComparison(ctx, canvas) {
	const width = canvas.width;
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Without Regularization', width / 4, 40);
	ctx.fillStyle = '#ff6b35';
	ctx.fillText('With Regularization', width * 3 / 4, 40);

	// Draw wavy overfit line
	ctx.strokeStyle = '#00d4ff';
	ctx.lineWidth = 2;
	ctx.beginPath();
	for (let x = 0; x < width / 2 - 20; x += 5) {
	const y = 100 + Math.sin(x / 10) * 30 + Math.random() * 20;
	if (x === 0) ctx.moveTo(x + 20, y);
	else ctx.lineTo(x + 20, y);
	}
	ctx.stroke();

	// Draw smooth regularized line
	ctx.strokeStyle = '#ff6b35';
	ctx.beginPath();
	for (let x = 0; x < width / 2 - 20; x += 5) {
	const y = 100 + Math.sin(x / 20) * 15;
	if (x === 0) ctx.moveTo(x + width / 2 + 20, y);
	else ctx.lineTo(x + width / 2 + 20, y);
	}
	ctx.stroke();
	}

	function drawBatchNormalization(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Batch Normalization: μ=0, σ²=1', canvas.width / 2, 50);

	// Draw before/after distributions
	ctx.fillStyle = '#ffa500';
	ctx.fillText('Input Distribution', canvas.width / 4, 100);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Normalized Distribution', canvas.width * 3 / 4, 100);
	}

	function drawImageMatrix(ctx, canvas) {
	const cellSize = 20;
	for (let i = 0; i < 10; i++) {
	for (let j = 0; j < 10; j++) {
	const intensity = Math.random();
	ctx.fillStyle = `rgba(0, 212, 255, ${intensity})`;
	ctx.fillRect(i * cellSize + 100, j * cellSize + 100, cellSize, cellSize);
	}
	}
	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Image as Matrix (Pixel Values)', canvas.width / 2, 50);
	}

	function drawPoolingDemo(ctx, canvas) {
	const cellSize = 30;
	const matrix = [[12, 20, 30, 0], [8, 12, 2, 0], [34, 70, 37, 4], [112, 100, 25, 12]];

	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Max Pooling Demo (2x2)', canvas.width / 2, 30);

	// Draw input matrix
	for (let i = 0; i < 4; i++) {
	for (let j = 0; j < 4; j++) {
	ctx.strokeStyle = '#00d4ff';
	ctx.strokeRect(50 + j * cellSize, 50 + i * cellSize, cellSize, cellSize);
	ctx.fillStyle = '#e4e6eb';
	ctx.font = '10px Arial';
	ctx.fillText(matrix[i][j], 50 + j * cellSize + cellSize / 2, 50 + i * cellSize + cellSize / 2 + 4);
	}
	}

	// Draw output (max pooled)
	const pooled = [[20, 30], [112, 37]];
	for (let i = 0; i < 2; i++) {
	for (let j = 0; j < 2; j++) {
	ctx.strokeStyle = '#00ff88';
	ctx.strokeRect(250 + j * cellSize * 1.5, 70 + i * cellSize * 1.5, cellSize * 1.5, cellSize * 1.5);
	ctx.fillStyle = '#00ff88';
	ctx.font = 'bold 12px Arial';
	ctx.fillText(pooled[i][j], 250 + j * cellSize * 1.5 + cellSize * 0.75, 70 + i * cellSize * 1.5 + cellSize * 0.75 + 5);
	}
	}
	}

	function drawCNNArchitecture(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 12px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Input', 60, 200);
	ctx.fillText('Conv', 160, 200);
	ctx.fillText('Pool', 260, 200);
	ctx.fillText('Conv', 360, 200);
	ctx.fillText('Pool', 460, 200);
	ctx.fillText('FC', 560, 200);
	ctx.fillText('Output', 660, 200);

	// Draw blocks
	const blocks = [60, 160, 260, 360, 460, 560, 660];
	blocks.forEach((x, i) => {
	const height = i === 0 ? 100 : (i < blocks.length - 2 ? 80 - i * 10 : 60);
	ctx.strokeStyle = '#00d4ff';
	ctx.strokeRect(x - 30, 100, 60, height);
	});
	}

	function drawLearnedFilters(ctx, canvas) {
	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('CNN Learned Filters', canvas.width / 2, 30);

	const labels = ['Edges', 'Textures', 'Patterns', 'Objects'];
	labels.forEach((label, i) => {
	const x = (i + 1) * canvas.width / 5;
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 12px Arial';
	ctx.fillText(label, x, 80);

	// Draw filter representation
	for (let j = 0; j < 3; j++) {
	for (let k = 0; k < 3; k++) {
	const intensity = Math.random();
	ctx.fillStyle = `rgba(0, 212, 255, ${intensity})`;
	ctx.fillRect(x - 20 + k * 12, 100 + j * 12, 10, 10);
	}
	}
	});
	}

	function drawLeNetArchitecture(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }
	function drawAlexNetArchitecture(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }
	function drawVGGArchitecture(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }
	function drawResNetArchitecture(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }
	function drawInceptionModule(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }
	function drawMobileNetArchitecture(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }
	function drawTransferLearning(ctx, canvas) { drawCNNArchitecture(ctx, canvas); }

	function drawBoundingBoxes(ctx, canvas) {
	// Draw sample image
	ctx.fillStyle = 'rgba(0, 212, 255, 0.1)';
	ctx.fillRect(50, 50, 300, 300);

	// Draw bounding boxes
	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 3;
	ctx.strokeRect(100, 100, 150, 150);
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 12px Arial';
	ctx.fillText('Dog 95%', 105, 95);

	ctx.strokeStyle = '#00ff88';
	ctx.strokeRect(180, 200, 100, 80);
	ctx.fillStyle = '#00ff88';
	ctx.fillText('Cat 87%', 185, 195);
	}

	function drawRCNNPipeline(ctx, canvas) { drawBoundingBoxes(ctx, canvas); }
	function drawSSDDetector(ctx, canvas) { drawBoundingBoxes(ctx, canvas); }

	function drawSemanticSegmentation(ctx, canvas) {
	const cellSize = 15;
	const colors = ['rgba(0, 212, 255, 0.5)', 'rgba(255, 107, 53, 0.5)', 'rgba(0, 255, 136, 0.5)'];

	for (let i = 0; i < 20; i++) {
	for (let j = 0; j < 20; j++) {
	ctx.fillStyle = colors[Math.floor(Math.random() * colors.length)];
	ctx.fillRect(i * cellSize + 100, j * cellSize + 50, cellSize, cellSize);
	}
	}

	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Pixel-wise Classification', canvas.width / 2, 30);
	}

	function drawInstanceSegmentation(ctx, canvas) { drawSemanticSegmentation(ctx, canvas); }

	function drawFaceEmbeddings(ctx, canvas) {
	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Face Embedding Space', canvas.width / 2, 30);

	// Draw embedding vectors
	const faces = 5;
	for (let i = 0; i < faces; i++) {
	const x = 100 + Math.random() * (canvas.width - 200);
	const y = 100 + Math.random() * 200;
	ctx.fillStyle = '#00d4ff';
	ctx.beginPath();
	ctx.arc(x, y, 10, 0, Math.PI * 2);
	ctx.fill();
	}
	}

	function drawAutoencoderArchitecture(ctx, canvas) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 12px Arial';
	ctx.textAlign = 'center';

	const stages = ['Input', 'Encoder', 'Latent', 'Decoder', 'Output'];
	stages.forEach((label, i) => {
	const x = (i + 1) * canvas.width / 6;
	ctx.fillText(label, x, 50);
	const height = i === 2 ? 40 : (i === 0 \|\| i === 4 ? 100 : 70);
	ctx.strokeStyle = '#00d4ff';
	ctx.strokeRect(x - 30, 100, 60, height);
	});
	}

	function drawGANsGame(ctx, canvas) {
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Generator', canvas.width / 3, 50);
	ctx.fillStyle = '#00d4ff';
	ctx.fillText('Discriminator', canvas.width * 2 / 3, 50);

	// DrawGenerator
	ctx.strokeStyle = '#ff6b35';
	ctx.strokeRect(canvas.width / 3 - 50, 100, 100, 100);

	// Draw Discriminator
	ctx.strokeStyle = '#00d4ff';
	ctx.strokeRect(canvas.width * 2 / 3 - 50, 100, 100, 100);

	// Draw arrow
	ctx.strokeStyle = '#00ff88';
	ctx.lineWidth = 2;
	ctx.beginPath();
	ctx.moveTo(canvas.width / 3 + 50, 150);
	ctx.lineTo(canvas.width * 2 / 3 - 50, 150);
	ctx.stroke();
	}

	function drawDiffusionProcess(ctx, canvas) {
	const steps = 5;
	const stepWidth = canvas.width / (steps + 1);

	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Diffusion Process: From Noise to Image', canvas.width / 2, 30);

	for (let i = 0; i < steps; i++) {
	const x = (i + 1) * stepWidth;
	const noise = 1 - (i / steps);
	ctx.fillStyle = `rgba(0, 212, 255, ${1 - noise})`;
	ctx.fillRect(x - 40, 100, 80, 80);
	ctx.strokeStyle = '#00d4ff';
	ctx.strokeRect(x - 40, 100, 80, 80);
	}
	}

	function drawRNNUnrolled(ctx, canvas) {
	const cells = 5;
	const cellWidth = canvas.width / (cells + 1);

	ctx.fillStyle = '#e4e6eb';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Unrolled RNN', canvas.width / 2, 30);

	for (let i = 0; i < cells; i++) {
	const x = (i + 1) * cellWidth;
	ctx.strokeStyle = '#00d4ff';
	ctx.strokeRect(x - 30, 100, 60, 60);

	if (i < cells - 1) {
	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 2;
	ctx.beginPath();
	ctx.moveTo(x + 30, 130);
	ctx.lineTo(x + cellWidth - 30, 130);
	ctx.stroke();
	}
	}
	}

	function drawBERTProcess(ctx, canvas) { drawAttentionMatrix(ctx, canvas); }
	function drawGPTGeneration(ctx, canvas) { drawAttentionMatrix(ctx, canvas); }
	function drawVisionTransformer(ctx, canvas) { drawAttentionMatrix(ctx, canvas); }

	function drawVisualization(moduleId) {
	drawConceptsVisualization(moduleId);
	}

	// Animation and download utilities
	let animationFrameId = null;

	function toggleVizAnimation(moduleId) {
	const btn = event.target;
	window.vizAnimating = !window.vizAnimating;

	if (window.vizAnimating) {
	btn.textContent = '⏹️ Stop';
	btn.style.background = 'linear-gradient(135deg, #ff4444, #cc0000)';
	animateVisualization(moduleId);
	} else {
	btn.textContent = '▶️ Animate';
	btn.style.background = '';
	if (animationFrameId) {
	cancelAnimationFrame(animationFrameId);
	animationFrameId = null;
	}
	}
	}

	function animateVisualization(moduleId) {
	if (!window.vizAnimating) return;

	const canvas = document.getElementById(moduleId + '-canvas');
	if (!canvas) return;

	const ctx = canvas.getContext('2d');
	ctx.clearRect(0, 0, canvas.width, canvas.height);
	ctx.fillStyle = '#0f1419';
	ctx.fillRect(0, 0, canvas.width, canvas.height);

	// Call the appropriate animated drawing function
	const animatedVizMap = {
	'nn-basics': drawAnimatedNetwork,
	'perceptron': drawAnimatedDecisionBoundary,
	'mlp': drawAnimatedMLP,
	'activation': drawAnimatedActivations,
	'conv-layer': drawAnimatedConvolution,
	'gnn': drawAnimatedGNN,
	'transformers': drawAnimatedAttention,
	'backprop': drawAnimatedGradientFlow,
	'gans': drawAnimatedGAN,
	'diffusion': drawAnimatedDiffusion,
	'rnn': drawAnimatedRNN
	};

	if (animatedVizMap[moduleId]) {
	animatedVizMap[moduleId](ctx, canvas, Date.now());
	} else {
	// Default animation - pulsing visualization
	drawDefaultAnimation(ctx, canvas, Date.now());
	}

	animationFrameId = requestAnimationFrame(() => animateVisualization(moduleId));
	}

	// Default animation for modules without specific animations
	function drawDefaultAnimation(ctx, canvas, time) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;
	const pulse = Math.sin(time / 300) * 0.3 + 0.7;

	// Animated neural network
	const layers = [3, 4, 4, 2];
	const layerWidth = canvas.width / (layers.length + 1);

	layers.forEach((neurons, layerIdx) => {
	const x = (layerIdx + 1) * layerWidth;
	const layerHeight = canvas.height / (neurons + 1);

	for (let i = 0; i < neurons; i++) {
	const y = (i + 1) * layerHeight;
	const radius = 12 + Math.sin(time / 200 + layerIdx + i) * 3;

	// Draw neuron
	ctx.fillStyle = `rgba(0, 212, 255, ${pulse})`;
	ctx.beginPath();
	ctx.arc(x, y, radius, 0, Math.PI * 2);
	ctx.fill();

	// Draw connections to next layer
	if (layerIdx < layers.length - 1) {
	const nextLayerHeight = canvas.height / (layers[layerIdx + 1] + 1);
	const nextX = (layerIdx + 2) * layerWidth;

	for (let j = 0; j < layers[layerIdx + 1]; j++) {
	const nextY = (j + 1) * nextLayerHeight;
	const signalProgress = ((time / 500) + layerIdx * 0.5) % 1;

	ctx.strokeStyle = `rgba(0, 212, 255, ${0.3 + signalProgress * 0.3})`;
	ctx.lineWidth = 1;
	ctx.beginPath();
	ctx.moveTo(x + radius, y);
	ctx.lineTo(nextX - 12, nextY);
	ctx.stroke();

	// Animated signal dot
	const dotX = x + radius + (nextX - 12 - x - radius) * signalProgress;
	const dotY = y + (nextY - y) * signalProgress;
	ctx.fillStyle = '#00ff88';
	ctx.beginPath();
	ctx.arc(dotX, dotY, 3, 0, Math.PI * 2);
	ctx.fill();
	}
	}
	}
	});

	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('🔄 Neural Network Animation', centerX, 25);
	}

	// Animated GNN with message passing
	function drawAnimatedGNN(ctx, canvas, time) {
	ctx.fillStyle = '#9900ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Graph Neural Network - Message Passing', canvas.width / 2, 30);

	const nodes = [
	{ x: 100, y: 100 }, { x: 200, y: 60 }, { x: 320, y: 120 },
	{ x: 150, y: 200 }, { x: 400, y: 80 }, { x: 450, y: 180 }
	];
	const edges = [[0, 1], [0, 3], [1, 2], [1, 4], [2, 3], [2, 4], [4, 5]];

	// Draw edges
	ctx.strokeStyle = 'rgba(153, 0, 255, 0.4)';
	ctx.lineWidth = 2;
	edges.forEach(e => {
	ctx.beginPath();
	ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
	ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
	ctx.stroke();
	});

	// Draw animated message passing
	const messageProgress = (time / 1000) % 1;
	ctx.fillStyle = '#00ff88';
	edges.forEach((e, idx) => {
	const progress = (messageProgress + idx * 0.15) % 1;
	const x = nodes[e[0]].x + (nodes[e[1]].x - nodes[e[0]].x) * progress;
	const y = nodes[e[0]].y + (nodes[e[1]].y - nodes[e[0]].y) * progress;
	ctx.beginPath();
	ctx.arc(x, y, 5, 0, Math.PI * 2);
	ctx.fill();
	});

	// Draw nodes with pulse
	const pulse = Math.sin(time / 300) * 5 + 15;
	nodes.forEach((n, i) => {
	ctx.fillStyle = '#9900ff';
	ctx.beginPath();
	ctx.arc(n.x, n.y, pulse, 0, Math.PI * 2);
	ctx.fill();
	ctx.fillStyle = 'white';
	ctx.font = '12px Arial';
	ctx.textAlign = 'center';
	ctx.fillText(i, n.x, n.y + 4);
	});
	}

	// Animated attention matrix
	function drawAnimatedAttention(ctx, canvas, time) {
	const words = ['The', 'cat', 'sat', 'on', 'mat'];
	const cellSize = 50;
	const startX = (canvas.width - words.length * cellSize) / 2;
	const startY = 80;

	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Self-Attention Animation', canvas.width / 2, 30);

	// Draw words
	ctx.font = '12px Arial';
	words.forEach((word, i) => {
	ctx.fillStyle = '#e4e6eb';
	ctx.fillText(word, startX + i * cellSize + cellSize/2, startY - 10);
	ctx.save();
	ctx.translate(startX - 20, startY + i * cellSize + cellSize/2);
	ctx.fillText(word, 0, 0);
	ctx.restore();
	});

	// Animated attention weights
	for (let i = 0; i < words.length; i++) {
	for (let j = 0; j < words.length; j++) {
	const baseWeight = i === j ? 0.8 : 0.2 + Math.abs(i - j) * 0.1;
	const animatedWeight = baseWeight + Math.sin(time / 500 + i + j) * 0.2;
	const alpha = Math.max(0.1, Math.min(1, animatedWeight));

	ctx.fillStyle = `rgba(0, 212, 255, ${alpha})`;
	ctx.fillRect(startX + j * cellSize + 2, startY + i * cellSize + 2, cellSize - 4, cellSize - 4);

	ctx.fillStyle = '#e4e6eb';
	ctx.font = '10px Arial';
	ctx.fillText(animatedWeight.toFixed(2), startX + j * cellSize + cellSize/2, startY + i * cellSize + cellSize/2 + 4);
	}
	}
	}

	// Animated gradient flow for backprop
	function drawAnimatedGradientFlow(ctx, canvas, time) {
	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Backpropagation - Gradient Flow', canvas.width / 2, 30);

	const layers = [2, 4, 4, 1];
	const layerWidth = canvas.width / (layers.length + 1);

	// Forward pass (left to right) - blue
	const forwardProgress = (time / 2000) % 1;

	layers.forEach((neurons, layerIdx) => {
	const x = (layerIdx + 1) * layerWidth;
	const layerHeight = canvas.height / (neurons + 1);

	for (let i = 0; i < neurons; i++) {
	const y = (i + 1) * layerHeight;

	// Pulse effect based on forward pass
	const isActive = forwardProgress > layerIdx / layers.length;
	const radius = isActive ? 15 + Math.sin(time / 200) * 3 : 12;

	ctx.fillStyle = isActive ? '#00d4ff' : 'rgba(0, 212, 255, 0.3)';
	ctx.beginPath();
	ctx.arc(x, y, radius, 0, Math.PI * 2);
	ctx.fill();
	}
	});

	// Backward pass (right to left) - orange/red gradients
	const backwardProgress = ((time / 2000) + 0.5) % 1;

	for (let layerIdx = layers.length - 2; layerIdx >= 0; layerIdx--) {
	const x1 = (layerIdx + 1) * layerWidth;
	const x2 = (layerIdx + 2) * layerWidth;
	const gradientActive = backwardProgress > (layers.length - 2 - layerIdx) / (layers.length - 1);

	if (gradientActive) {
	const gradX = x2 - (x2 - x1) * ((backwardProgress * (layers.length - 1)) % 1);
	ctx.fillStyle = '#ff6b35';
	ctx.beginPath();
	ctx.arc(gradX, canvas.height / 2, 8, 0, Math.PI * 2);
	ctx.fill();
	}
	}

	ctx.fillStyle = '#e4e6eb';
	ctx.font = '12px Arial';
	ctx.fillText('Forward: Blue → \| Backward: Orange ←', canvas.width / 2, canvas.height - 20);
	}

	// Animated network for nn-basics
	function drawAnimatedNetwork(ctx, canvas, time) {
	drawDefaultAnimation(ctx, canvas, time);
	}

	// Animated decision boundary for perceptron
	function drawAnimatedDecisionBoundary(ctx, canvas, time) {
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;

	ctx.fillStyle = '#ff6b35';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Perceptron Decision Boundary', canvas.width / 2, 30);

	// Animated rotating decision boundary
	const angle = time / 2000;
	const length = 200;

	ctx.strokeStyle = '#ff6b35';
	ctx.lineWidth = 3;
	ctx.beginPath();
	ctx.moveTo(centerX - Math.cos(angle) * length, centerY - Math.sin(angle) * length);
	ctx.lineTo(centerX + Math.cos(angle) * length, centerY + Math.sin(angle) * length);
	ctx.stroke();

	// Fixed sample points
	const points = [
	{x: 100, y: 80, c: 1}, {x: 150, y: 100, c: 1}, {x: 120, y: 150, c: 1},
	{x: 400, y: 200, c: 0}, {x: 450, y: 180, c: 0}, {x: 380, y: 250, c: 0}
	];

	points.forEach(p => {
	ctx.fillStyle = p.c === 1 ? '#00d4ff' : '#00ff88';
	ctx.beginPath();
	ctx.arc(p.x, p.y, 8, 0, Math.PI * 2);
	ctx.fill();
	});
	}

	function drawAnimatedMLP(ctx, canvas, time) {
	drawDefaultAnimation(ctx, canvas, time);
	}

	function drawAnimatedActivations(ctx, canvas, time) {
	drawActivationFunctions(ctx, canvas);

	// Add animated input marker
	const x = Math.sin(time / 500) * 4;
	const centerX = canvas.width / 2;
	const centerY = canvas.height / 2;
	const scale = 40;

	ctx.fillStyle = '#ffffff';
	ctx.beginPath();
	ctx.arc(centerX + x * scale, centerY, 6, 0, Math.PI * 2);
	ctx.fill();

	ctx.strokeStyle = '#ffffff';
	ctx.setLineDash([5, 5]);
	ctx.beginPath();
	ctx.moveTo(centerX + x * scale, 0);
	ctx.lineTo(centerX + x * scale, canvas.height);
	ctx.stroke();
	ctx.setLineDash([]);
	}

	function drawAnimatedConvolution(ctx, canvas, time) {
	drawConvolutionAnimation(ctx, canvas);
	}

	function drawAnimatedGAN(ctx, canvas, time) {
	ctx.fillStyle = '#ffaa00';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('GAN Training Animation', canvas.width / 2, 30);

	const phase = Math.floor(time / 1000) % 4;

	// Generator
	ctx.fillStyle = phase <= 1 ? '#00ff88' : 'rgba(0, 255, 136, 0.3)';
	ctx.fillRect(50, 100, 100, 80);
	ctx.fillStyle = '#e4e6eb';
	ctx.font = '12px Arial';
	ctx.fillText('Generator', 100, 145);

	// Fake image
	const noiseToFake = Math.sin(time / 300) * 0.5 + 0.5;
	ctx.fillStyle = `rgba(255, 170, 0, ${noiseToFake})`;
	ctx.fillRect(200, 110, 60, 60);
	ctx.fillStyle = '#e4e6eb';
	ctx.fillText('Fake', 230, 200);

	// Discriminator
	ctx.fillStyle = phase >= 2 ? '#ff6b35' : 'rgba(255, 107, 53, 0.3)';
	ctx.fillRect(320, 100, 100, 80);
	ctx.fillStyle = '#e4e6eb';
	ctx.fillText('Discriminator', 370, 145);

	// Output
	const output = phase === 3 ? 'Real?' : 'Fake?';
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 14px Arial';
	ctx.fillText(output, 370, 220);

	// Arrows
	ctx.strokeStyle = '#e4e6eb';
	ctx.lineWidth = 2;
	ctx.beginPath();
	ctx.moveTo(150, 140);
	ctx.lineTo(200, 140);
	ctx.stroke();
	ctx.beginPath();
	ctx.moveTo(260, 140);
	ctx.lineTo(320, 140);
	ctx.stroke();
	}

	function drawAnimatedDiffusion(ctx, canvas, time) {
	ctx.fillStyle = '#9900ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Diffusion Process Animation', canvas.width / 2, 30);

	const steps = 5;
	const stepWidth = canvas.width / (steps + 1);

	const progress = (time / 3000) % 1;
	const currentStep = Math.floor(progress * steps);

	for (let i = 0; i < steps; i++) {
	const x = (i + 1) * stepWidth;
	const y = 150;
	const noiseLevel = i / (steps - 1);
	const isActive = i <= currentStep;

	// Draw square with noise
	ctx.fillStyle = isActive ? '#9900ff' : 'rgba(153, 0, 255, 0.3)';
	ctx.fillRect(x - 30, y - 30, 60, 60);

	// Add noise dots
	if (noiseLevel > 0) {
	for (let j = 0; j < noiseLevel * 20; j++) {
	const nx = x - 25 + Math.random() * 50;
	const ny = y - 25 + Math.random() * 50;
	ctx.fillStyle = 'rgba(255, 255, 255, 0.5)';
	ctx.fillRect(nx, ny, 2, 2);
	}
	}

	ctx.fillStyle = '#e4e6eb';
	ctx.font = '10px Arial';
	ctx.fillText(`t=${i}`, x, y + 50);
	}

	ctx.fillStyle = '#e4e6eb';
	ctx.font = '12px Arial';
	ctx.fillText('Clean → Noisy (Forward) \| Noisy → Clean (Reverse)', canvas.width / 2, canvas.height - 20);
	}

	function drawAnimatedRNN(ctx, canvas, time) {
	ctx.fillStyle = '#00d4ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('RNN Unrolled Through Time', canvas.width / 2, 30);

	const steps = 5;
	const stepWidth = canvas.width / (steps + 1);
	const progress = (time / 500) % steps;
	const activeStep = Math.floor(progress);

	for (let i = 0; i < steps; i++) {
	const x = (i + 1) * stepWidth;
	const y = 150;
	const isActive = i === activeStep;

	// Hidden state
	ctx.fillStyle = isActive ? '#00d4ff' : 'rgba(0, 212, 255, 0.3)';
	ctx.beginPath();
	ctx.arc(x, y, 25, 0, Math.PI * 2);
	ctx.fill();

	ctx.fillStyle = '#e4e6eb';
	ctx.font = '10px Arial';
	ctx.fillText(`h${i}`, x, y + 4);

	// Input arrow
	ctx.strokeStyle = isActive ? '#00ff88' : 'rgba(0, 255, 136, 0.3)';
	ctx.lineWidth = 2;
	ctx.beginPath();
	ctx.moveTo(x, y + 60);
	ctx.lineTo(x, y + 25);
	ctx.stroke();
	ctx.fillText(`x${i}`, x, y + 75);

	// Recurrent connection
	if (i < steps - 1) {
	ctx.strokeStyle = isActive ? '#ff6b35' : 'rgba(255, 107, 53, 0.3)';
	ctx.beginPath();
	ctx.moveTo(x + 25, y);
	ctx.lineTo(x + stepWidth - 25, y);
	ctx.stroke();

	// Animated signal
	if (isActive) {
	const signalX = x + 25 + (stepWidth - 50) * (progress % 1);
	ctx.fillStyle = '#ff6b35';
	ctx.beginPath();
	ctx.arc(signalX, y, 5, 0, Math.PI * 2);
	ctx.fill();
	}
	}
	}
	}

	function downloadViz(moduleId) {
	const canvas = document.getElementById(moduleId + '-canvas');
	if (!canvas) return;

	const link = document.createElement('a');
	link.href = canvas.toDataURL('image/png');
	link.download = moduleId + '-visualization.png';
	link.click();
	}

	function drawGraphNetwork(ctx, canvas) {
	ctx.fillStyle = '#9900ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Graph Structure & Message Passing', canvas.width / 2, 30);

	const nodes = [
	{ x: 100, y: 100 }, { x: 200, y: 50 }, { x: 300, y: 150 },
	{ x: 150, y: 250 }, { x: 400, y: 100 }, { x: 500, y: 200 }
	];
	const edges = [
	[0, 1], [0, 3], [1, 2], [1, 4], [2, 3], [2, 4], [4, 5]
	];

	// Draw edges
	ctx.strokeStyle = 'rgba(153, 0, 255, 0.4)';
	ctx.lineWidth = 2;
	edges.forEach(e => {
	ctx.beginPath();
	ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
	ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
	ctx.stroke();
	});

	// Draw nodes
	nodes.forEach((n, i) => {
	ctx.fillStyle = '#9900ff';
	ctx.beginPath();
	ctx.arc(n.x, n.y, 15, 0, Math.PI * 2);
	ctx.fill();
	ctx.fillStyle = 'white';
	ctx.font = '12px Arial';
	ctx.fillText(i, n.x, n.y + 4);
	});

	// Draw Message Passing Animation (fake)
	const t = (Date.now() / 1000) % 2;
	if (t > 1) {
	ctx.strokeStyle = '#00ff88';
	ctx.lineWidth = 4;
	edges.forEach((e, idx) => {
	if (idx % 2 === 0) {
	ctx.beginPath();
	ctx.moveTo(nodes[e[0]].x, nodes[e[0]].y);
	ctx.lineTo(nodes[e[1]].x, nodes[e[1]].y);
	ctx.stroke();
	}
	});
	}
	}

	function drawGNNMath(ctx, canvas) {
	ctx.fillStyle = '#9900ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('Graph Convolution Math', canvas.width / 2, 50);

	ctx.fillStyle = '#e4e6eb';
	ctx.font = '14px Courier New';
	ctx.fillText('H(l+1) = σ(D^-½ A D^-½ H(l) W(l))', canvas.width / 2, 100);

	ctx.fillStyle = '#00ff88';
	ctx.fillText('A = Neighborhood Connections', canvas.width / 2, 150);
	ctx.fillStyle = '#ff6b35';
	ctx.fillText('D = Normalization Factor', canvas.width / 2, 180);
	}

	function drawGNNApplications(ctx, canvas) {
	ctx.fillStyle = '#9900ff';
	ctx.font = 'bold 16px Arial';
	ctx.textAlign = 'center';
	ctx.fillText('💊 Drug Discovery (Molecular Graphs)', canvas.width / 2, 60);

	ctx.fillStyle = '#00d4ff';
	ctx.fillText('🚗 Traffic Flow Prediction', canvas.width / 2, 120);

	ctx.fillStyle = '#ff6b35';
	ctx.fillText('🛒 Pinterest/Amazon Recommendations', canvas.width / 2, 180);
	}

	initDashboard();
	</script>
	</body>

	</html>