Spaces:
Running
Running
Commit ·
8e8f68a
1
Parent(s): d65393f
feat: add Vector DB, RAG, and Advanced LLM modules to Deep Learning hub
Browse files- DeepLearning/index.html +829 -1
- index.html +2 -1
- shared/js/search.js +4 -1
DeepLearning/index.html
CHANGED
|
@@ -757,6 +757,31 @@
|
|
| 757 |
category: "Advanced",
|
| 758 |
color: "#9900ff",
|
| 759 |
description: "Curated collection of seminal deep learning papers"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 760 |
}
|
| 761 |
];
|
| 762 |
|
|
@@ -5195,9 +5220,568 @@ output, attn_weights = mha(x, x, x) <span style="color: #6c7086;"># Self-attent
|
|
| 5195 |
<div class="box-content">Pinterest (PinSage), User-Item graphs</div>
|
| 5196 |
</div>
|
| 5197 |
`
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5198 |
}
|
| 5199 |
};
|
| 5200 |
|
|
|
|
| 5201 |
function createModuleHTML(module) {
|
| 5202 |
const content = MODULE_CONTENT[module.id] || {};
|
| 5203 |
|
|
@@ -5417,7 +6001,10 @@ output, attn_weights = mha(x, x, x) <span style="color: #6c7086;"># Self-attent
|
|
| 5417 |
'vit': drawVisionTransformer,
|
| 5418 |
'gnn': drawGraphNetwork,
|
| 5419 |
'seq2seq': drawSeq2SeqAttention,
|
| 5420 |
-
'research-papers': drawDefaultVisualization
|
|
|
|
|
|
|
|
|
|
| 5421 |
};
|
| 5422 |
|
| 5423 |
if (vizMap[moduleId]) {
|
|
@@ -7089,6 +7676,247 @@ output, attn_weights = mha(x, x, x) <span style="color: #6c7086;"># Self-attent
|
|
| 7089 |
ctx.fillText('🛒 Pinterest/Amazon Recommendations', canvas.width / 2, 180);
|
| 7090 |
}
|
| 7091 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7092 |
initDashboard();
|
| 7093 |
</script>
|
| 7094 |
</body>
|
|
|
|
| 757 |
category: "Advanced",
|
| 758 |
color: "#9900ff",
|
| 759 |
description: "Curated collection of seminal deep learning papers"
|
| 760 |
+
},
|
| 761 |
+
// Module 7: GenAI & LLM Engineering
|
| 762 |
+
{
|
| 763 |
+
id: "vector-db",
|
| 764 |
+
title: "Vector Databases",
|
| 765 |
+
icon: "🧲",
|
| 766 |
+
category: "GenAI",
|
| 767 |
+
color: "#00c9a7",
|
| 768 |
+
description: "Embeddings, similarity search, FAISS, Pinecone, ChromaDB"
|
| 769 |
+
},
|
| 770 |
+
{
|
| 771 |
+
id: "rag",
|
| 772 |
+
title: "RAG Pipelines",
|
| 773 |
+
icon: "🔗",
|
| 774 |
+
category: "GenAI",
|
| 775 |
+
color: "#00c9a7",
|
| 776 |
+
description: "Retrieval-Augmented Generation for grounded AI"
|
| 777 |
+
},
|
| 778 |
+
{
|
| 779 |
+
id: "advanced-llm",
|
| 780 |
+
title: "Fine-Tuning & Quantization",
|
| 781 |
+
icon: "⚙️",
|
| 782 |
+
category: "GenAI",
|
| 783 |
+
color: "#00c9a7",
|
| 784 |
+
description: "LoRA, QLoRA, PEFT, GGUF, and deployment strategies"
|
| 785 |
}
|
| 786 |
];
|
| 787 |
|
|
|
|
| 5220 |
<div class="box-content">Pinterest (PinSage), User-Item graphs</div>
|
| 5221 |
</div>
|
| 5222 |
`
|
| 5223 |
+
},
|
| 5224 |
+
"vector-db": {
|
| 5225 |
+
overview: `
|
| 5226 |
+
<h3>What are Vector Databases?</h3>
|
| 5227 |
+
<p>Vector databases store data as <strong>high-dimensional numerical vectors</strong> (embeddings) and enable <strong>similarity search</strong> instead of exact-match queries. They are the backbone of modern AI applications like semantic search, recommendation systems, and RAG pipelines.</p>
|
| 5228 |
+
|
| 5229 |
+
<h3>Why Do We Need Them?</h3>
|
| 5230 |
+
<ul>
|
| 5231 |
+
<li><strong>Semantic Understanding:</strong> Traditional databases search by keywords; vector DBs search by meaning</li>
|
| 5232 |
+
<li><strong>AI-Native:</strong> LLMs and neural networks produce embeddings — vector DBs store and query them</li>
|
| 5233 |
+
<li><strong>Scale:</strong> Billions of vectors with sub-millisecond query times using approximate nearest neighbor (ANN) algorithms</li>
|
| 5234 |
+
<li><strong>Multi-Modal:</strong> Store text, image, audio, and video embeddings in the same space</li>
|
| 5235 |
+
</ul>
|
| 5236 |
+
|
| 5237 |
+
<h3>Vector DB Landscape</h3>
|
| 5238 |
+
<table>
|
| 5239 |
+
<tr>
|
| 5240 |
+
<th>Database</th>
|
| 5241 |
+
<th>Type</th>
|
| 5242 |
+
<th>Best For</th>
|
| 5243 |
+
<th>Index</th>
|
| 5244 |
+
</tr>
|
| 5245 |
+
<tr>
|
| 5246 |
+
<td>FAISS</td>
|
| 5247 |
+
<td>Library</td>
|
| 5248 |
+
<td>Research, local use</td>
|
| 5249 |
+
<td>IVF, HNSW, PQ</td>
|
| 5250 |
+
</tr>
|
| 5251 |
+
<tr>
|
| 5252 |
+
<td>Pinecone</td>
|
| 5253 |
+
<td>Managed SaaS</td>
|
| 5254 |
+
<td>Production, low-ops</td>
|
| 5255 |
+
<td>Proprietary</td>
|
| 5256 |
+
</tr>
|
| 5257 |
+
<tr>
|
| 5258 |
+
<td>ChromaDB</td>
|
| 5259 |
+
<td>Open-Source</td>
|
| 5260 |
+
<td>Prototyping, local RAG</td>
|
| 5261 |
+
<td>HNSW</td>
|
| 5262 |
+
</tr>
|
| 5263 |
+
<tr>
|
| 5264 |
+
<td>Weaviate</td>
|
| 5265 |
+
<td>Open-Source</td>
|
| 5266 |
+
<td>Hybrid search</td>
|
| 5267 |
+
<td>HNSW + BM25</td>
|
| 5268 |
+
</tr>
|
| 5269 |
+
<tr>
|
| 5270 |
+
<td>Milvus</td>
|
| 5271 |
+
<td>Open-Source</td>
|
| 5272 |
+
<td>Enterprise scale</td>
|
| 5273 |
+
<td>IVF, DiskANN</td>
|
| 5274 |
+
</tr>
|
| 5275 |
+
<tr>
|
| 5276 |
+
<td>Qdrant</td>
|
| 5277 |
+
<td>Open-Source</td>
|
| 5278 |
+
<td>Filtering + search</td>
|
| 5279 |
+
<td>HNSW</td>
|
| 5280 |
+
</tr>
|
| 5281 |
+
</table>
|
| 5282 |
+
|
| 5283 |
+
<div class="callout tip">
|
| 5284 |
+
<div class="callout-title">💡 Key Insight</div>
|
| 5285 |
+
Traditional DB: "Find all users named John"<br>
|
| 5286 |
+
Vector DB: "Find all users <strong>similar to</strong> this profile" — completely different paradigm!
|
| 5287 |
+
</div>
|
| 5288 |
+
`,
|
| 5289 |
+
concepts: `
|
| 5290 |
+
<h3>Core Concepts</h3>
|
| 5291 |
+
<div class="list-item">
|
| 5292 |
+
<div class="list-num">01</div>
|
| 5293 |
+
<div><strong>Embeddings:</strong> Dense numerical representations of data (text, images, etc.) produced by neural networks. Example: OpenAI's text-embedding-3-small produces 1536-dim vectors.</div>
|
| 5294 |
+
</div>
|
| 5295 |
+
<div class="list-item">
|
| 5296 |
+
<div class="list-num">02</div>
|
| 5297 |
+
<div><strong>Similarity Metrics:</strong> Cosine similarity (angle), Euclidean distance (L2), Dot product (magnitude-aware). Cosine is most common for text embeddings.</div>
|
| 5298 |
+
</div>
|
| 5299 |
+
<div class="list-item">
|
| 5300 |
+
<div class="list-num">03</div>
|
| 5301 |
+
<div><strong>ANN Algorithms:</strong> Approximate Nearest Neighbor — trade small accuracy loss for massive speed gains (exact search is O(n), ANN is O(log n)).</div>
|
| 5302 |
+
</div>
|
| 5303 |
+
<div class="list-item">
|
| 5304 |
+
<div class="list-num">04</div>
|
| 5305 |
+
<div><strong>HNSW (Hierarchical Navigable Small World):</strong> Graph-based index. Builds a multi-layer graph where higher layers have fewer, long-range connections. Most popular algorithm.</div>
|
| 5306 |
+
</div>
|
| 5307 |
+
<div class="list-item">
|
| 5308 |
+
<div class="list-num">05</div>
|
| 5309 |
+
<div><strong>IVF (Inverted File Index):</strong> Clusters vectors first (like k-means), then only searches relevant clusters. Fast but requires training step.</div>
|
| 5310 |
+
</div>
|
| 5311 |
+
<div class="list-item">
|
| 5312 |
+
<div class="list-num">06</div>
|
| 5313 |
+
<div><strong>Product Quantization (PQ):</strong> Compresses vectors by splitting them into sub-vectors and quantizing each. Reduces memory by 10-100x.</div>
|
| 5314 |
+
</div>
|
| 5315 |
+
|
| 5316 |
+
<h3>Embedding Pipeline</h3>
|
| 5317 |
+
<div class="formula">
|
| 5318 |
+
1. Raw Data → Embedding Model → Vector (e.g., [0.12, -0.34, 0.56, ...])<br>
|
| 5319 |
+
2. Store vector + metadata in Vector DB<br>
|
| 5320 |
+
3. Query: Convert query → vector → Find nearest neighbors<br>
|
| 5321 |
+
4. Return top-k most similar results with scores
|
| 5322 |
+
</div>
|
| 5323 |
+
|
| 5324 |
+
<div class="callout warning">
|
| 5325 |
+
<div class="callout-title">⚠️ Common Pitfalls</div>
|
| 5326 |
+
• Using wrong distance metric (cosine for normalized, L2 for raw)<br>
|
| 5327 |
+
• Not chunking documents properly (too large = diluted embedding)<br>
|
| 5328 |
+
• Mixing embedding models (query and docs must use same model)<br>
|
| 5329 |
+
• Ignoring metadata filtering (combine vector search + filters)
|
| 5330 |
+
</div>
|
| 5331 |
+
`,
|
| 5332 |
+
math: `
|
| 5333 |
+
<h3>📐 Paper & Pain: Vector Similarity Mathematics</h3>
|
| 5334 |
+
|
| 5335 |
+
<h4>Cosine Similarity</h4>
|
| 5336 |
+
<div class="formula" style="font-size: 1.2rem; text-align: center; margin: 20px 0; background: rgba(0, 212, 255, 0.08); padding: 25px; border-radius: 8px;">
|
| 5337 |
+
<strong>cos(A, B) = (A · B) / (||A|| × ||B||)</strong>
|
| 5338 |
+
</div>
|
| 5339 |
+
|
| 5340 |
+
<div class="callout insight">
|
| 5341 |
+
<div class="callout-title">📝 Manual Calculation</div>
|
| 5342 |
+
<strong>A = [1, 2, 3], B = [4, 5, 6]</strong><br><br>
|
| 5343 |
+
<strong>Step 1 — Dot Product:</strong><br>
|
| 5344 |
+
A · B = (1×4) + (2×5) + (3×6) = 4 + 10 + 18 = <strong>32</strong><br><br>
|
| 5345 |
+
<strong>Step 2 — Magnitudes:</strong><br>
|
| 5346 |
+
||A|| = √(1² + 2² + 3²) = √14 ≈ 3.742<br>
|
| 5347 |
+
||B|| = √(4² + 5² + 6²) = √77 ≈ 8.775<br><br>
|
| 5348 |
+
<strong>Step 3 — Cosine Similarity:</strong><br>
|
| 5349 |
+
cos(A, B) = 32 / (3.742 × 8.775) = 32 / 32.833 ≈ <strong>0.9746</strong><br><br>
|
| 5350 |
+
<strong>Interpretation:</strong> Very high similarity (close to 1.0 = identical direction)
|
| 5351 |
+
</div>
|
| 5352 |
+
|
| 5353 |
+
<h4>Euclidean Distance (L2)</h4>
|
| 5354 |
+
<div class="formula">
|
| 5355 |
+
d(A, B) = √(Σ(aᵢ - bᵢ)²)<br><br>
|
| 5356 |
+
For A = [1, 2, 3], B = [4, 5, 6]:<br>
|
| 5357 |
+
d = √((4-1)² + (5-2)² + (6-3)²) = √(9 + 9 + 9) = √27 ≈ <strong>5.196</strong>
|
| 5358 |
+
</div>
|
| 5359 |
+
|
| 5360 |
+
<h4>HNSW Complexity</h4>
|
| 5361 |
+
<div class="formula">
|
| 5362 |
+
Build time: O(N × log(N))<br>
|
| 5363 |
+
Query time: O(log(N)) — logarithmic!<br>
|
| 5364 |
+
Memory: O(N × M) where M = max connections per node<br><br>
|
| 5365 |
+
Compare to brute force: O(N) per query<br>
|
| 5366 |
+
For N = 1 billion vectors: HNSW is ~30 orders of magnitude faster
|
| 5367 |
+
</div>
|
| 5368 |
+
`,
|
| 5369 |
+
applications: `
|
| 5370 |
+
<h3>Real-World Applications</h3>
|
| 5371 |
+
<div class="info-box">
|
| 5372 |
+
<div class="box-title">🔍 Semantic Search</div>
|
| 5373 |
+
<div class="box-content">
|
| 5374 |
+
<strong>Google, Bing, Notion AI:</strong> Search by meaning, not keywords<br>
|
| 5375 |
+
<strong>Example:</strong> "How to fix my car" matches "automobile repair guide" even with zero keyword overlap
|
| 5376 |
+
</div>
|
| 5377 |
+
</div>
|
| 5378 |
+
<div class="info-box">
|
| 5379 |
+
<div class="box-title">🤖 RAG (Retrieval-Augmented Generation)</div>
|
| 5380 |
+
<div class="box-content">
|
| 5381 |
+
Store knowledge base as vectors → retrieve relevant context → feed to LLM for grounded answers. Used by ChatGPT with browsing, Perplexity AI, and enterprise AI assistants.
|
| 5382 |
+
</div>
|
| 5383 |
+
</div>
|
| 5384 |
+
<div class="info-box">
|
| 5385 |
+
<div class="box-title">🛒 Recommendation Systems</div>
|
| 5386 |
+
<div class="box-content">
|
| 5387 |
+
<strong>Spotify (music):</strong> Embed songs as vectors, recommend nearest neighbors<br>
|
| 5388 |
+
<strong>Netflix (movies):</strong> User + content embeddings for personalization
|
| 5389 |
+
</div>
|
| 5390 |
+
</div>
|
| 5391 |
+
<div class="info-box">
|
| 5392 |
+
<div class="box-title">🖼️ Image & Multi-Modal Search</div>
|
| 5393 |
+
<div class="box-content">
|
| 5394 |
+
<strong>CLIP embeddings:</strong> Search images with text queries and vice versa<br>
|
| 5395 |
+
<strong>Google Lens:</strong> Find products by uploading a photo
|
| 5396 |
+
</div>
|
| 5397 |
+
</div>
|
| 5398 |
+
`
|
| 5399 |
+
},
|
| 5400 |
+
"rag": {
|
| 5401 |
+
overview: `
|
| 5402 |
+
<h3>Retrieval-Augmented Generation (RAG)</h3>
|
| 5403 |
+
<p>RAG combines the <strong>reasoning power of LLMs</strong> with <strong>external knowledge retrieval</strong> to produce accurate, grounded, and up-to-date responses. It solves the fundamental problem of LLM hallucination by anchoring generation in real data.</p>
|
| 5404 |
+
|
| 5405 |
+
<h3>Why RAG?</h3>
|
| 5406 |
+
<ul>
|
| 5407 |
+
<li><strong>Reduces Hallucination:</strong> LLMs generate from training data; RAG retrieves from your actual documents</li>
|
| 5408 |
+
<li><strong>Always Up-to-Date:</strong> No need to retrain the model when knowledge changes — just update the vector store</li>
|
| 5409 |
+
<li><strong>Domain-Specific:</strong> Works with proprietary data (internal docs, codebases, policies)</li>
|
| 5410 |
+
<li><strong>Cost-Effective:</strong> Much cheaper than fine-tuning for knowledge injection</li>
|
| 5411 |
+
<li><strong>Verifiable:</strong> Retrieved sources can be cited, enabling fact-checking</li>
|
| 5412 |
+
</ul>
|
| 5413 |
+
|
| 5414 |
+
<div class="callout tip">
|
| 5415 |
+
<div class="callout-title">💡 RAG vs Fine-Tuning</div>
|
| 5416 |
+
<strong>Fine-tuning:</strong> Changes the model's behavior/style (teach it HOW to respond)<br>
|
| 5417 |
+
<strong>RAG:</strong> Changes the model's knowledge (give it WHAT to respond with)<br><br>
|
| 5418 |
+
Rule of thumb: Use RAG for knowledge, fine-tuning for behavior. Combine both for production systems.
|
| 5419 |
+
</div>
|
| 5420 |
+
|
| 5421 |
+
<h3>The RAG Pipeline</h3>
|
| 5422 |
+
<div class="formula">
|
| 5423 |
+
<strong>Indexing Phase (Offline):</strong><br>
|
| 5424 |
+
Documents → Chunk → Embed → Store in Vector DB<br><br>
|
| 5425 |
+
<strong>Query Phase (Online):</strong><br>
|
| 5426 |
+
User Query → Embed → Retrieve top-k chunks → Context + Query → LLM → Answer
|
| 5427 |
+
</div>
|
| 5428 |
+
|
| 5429 |
+
<div class="callout warning">
|
| 5430 |
+
<div class="callout-title">⚠️ RAG Failure Modes</div>
|
| 5431 |
+
• <strong>Poor chunking:</strong> Chunks too large (diluted) or too small (missing context)<br>
|
| 5432 |
+
• <strong>Retrieval misses:</strong> Relevant docs not in top-k results<br>
|
| 5433 |
+
• <strong>Lost in the middle:</strong> LLMs ignore context in the middle of long prompts<br>
|
| 5434 |
+
• <strong>Outdated embeddings:</strong> Vector store not synced with source documents
|
| 5435 |
+
</div>
|
| 5436 |
+
`,
|
| 5437 |
+
concepts: `
|
| 5438 |
+
<h3>RAG Architecture Deep Dive</h3>
|
| 5439 |
+
|
| 5440 |
+
<h4>1. Document Processing</h4>
|
| 5441 |
+
<div class="list-item">
|
| 5442 |
+
<div class="list-num">01</div>
|
| 5443 |
+
<div><strong>Loading:</strong> Parse PDFs, HTML, Markdown, Databases, APIs using document loaders (LangChain, LlamaIndex)</div>
|
| 5444 |
+
</div>
|
| 5445 |
+
<div class="list-item">
|
| 5446 |
+
<div class="list-num">02</div>
|
| 5447 |
+
<div><strong>Chunking Strategies:</strong><br>
|
| 5448 |
+
• <strong>Fixed-size:</strong> Split every N tokens (simplest, often good enough)<br>
|
| 5449 |
+
• <strong>Recursive:</strong> Split by paragraphs → sentences → words (preserves structure)<br>
|
| 5450 |
+
• <strong>Semantic:</strong> Use embeddings to detect topic boundaries<br>
|
| 5451 |
+
• <strong>Overlap:</strong> Add 10-20% overlap between chunks to preserve context
|
| 5452 |
+
</div>
|
| 5453 |
+
</div>
|
| 5454 |
+
<div class="list-item">
|
| 5455 |
+
<div class="list-num">03</div>
|
| 5456 |
+
<div><strong>Embedding:</strong> Convert chunks to vectors using models like OpenAI text-embedding-3-small (1536-dim), Cohere embed-v3, or open-source models like BGE-large</div>
|
| 5457 |
+
</div>
|
| 5458 |
+
|
| 5459 |
+
<h4>2. Retrieval Strategies</h4>
|
| 5460 |
+
<div class="list-item">
|
| 5461 |
+
<div class="list-num">04</div>
|
| 5462 |
+
<div><strong>Dense Retrieval:</strong> Cosine similarity on embeddings (semantic search)</div>
|
| 5463 |
+
</div>
|
| 5464 |
+
<div class="list-item">
|
| 5465 |
+
<div class="list-num">05</div>
|
| 5466 |
+
<div><strong>Sparse Retrieval:</strong> BM25/TF-IDF keyword matching (exact term matching)</div>
|
| 5467 |
+
</div>
|
| 5468 |
+
<div class="list-item">
|
| 5469 |
+
<div class="list-num">06</div>
|
| 5470 |
+
<div><strong>Hybrid Search:</strong> Combine dense + sparse with Reciprocal Rank Fusion (RRF) — usually best results</div>
|
| 5471 |
+
</div>
|
| 5472 |
+
|
| 5473 |
+
<h4>3. Advanced RAG Patterns</h4>
|
| 5474 |
+
<div class="list-item">
|
| 5475 |
+
<div class="list-num">07</div>
|
| 5476 |
+
<div><strong>Multi-Query RAG:</strong> LLM generates multiple query variations → retrieve for each → merge results</div>
|
| 5477 |
+
</div>
|
| 5478 |
+
<div class="list-item">
|
| 5479 |
+
<div class="list-num">08</div>
|
| 5480 |
+
<div><strong>Re-ranking:</strong> Use a cross-encoder (e.g., Cohere Rerank) to re-score retrieved chunks for higher relevance</div>
|
| 5481 |
+
</div>
|
| 5482 |
+
<div class="list-item">
|
| 5483 |
+
<div class="list-num">09</div>
|
| 5484 |
+
<div><strong>Query Expansion:</strong> HyDE — generate a hypothetical answer, embed that, then retrieve</div>
|
| 5485 |
+
</div>
|
| 5486 |
+
<div class="list-item">
|
| 5487 |
+
<div class="list-num">10</div>
|
| 5488 |
+
<div><strong>Agentic RAG:</strong> LLM decides when and what to retrieve; can self-reflect and re-retrieve if initial results are poor</div>
|
| 5489 |
+
</div>
|
| 5490 |
+
|
| 5491 |
+
<div class="callout insight">
|
| 5492 |
+
<div class="callout-title">🔑 Production Checklist</div>
|
| 5493 |
+
✅ Chunk size 256-512 tokens with 10-20% overlap<br>
|
| 5494 |
+
✅ Use hybrid search (dense + BM25)<br>
|
| 5495 |
+
✅ Add re-ranker for precision<br>
|
| 5496 |
+
✅ Include metadata filtering (date, source, category)<br>
|
| 5497 |
+
✅ Monitor retrieval quality with evaluation metrics
|
| 5498 |
+
</div>
|
| 5499 |
+
`,
|
| 5500 |
+
math: `
|
| 5501 |
+
<h3>📐 RAG Evaluation Metrics</h3>
|
| 5502 |
+
|
| 5503 |
+
<h4>Retrieval Quality</h4>
|
| 5504 |
+
<div class="formula" style="font-size: 1.1rem; text-align: center; margin: 20px 0; background: rgba(0, 212, 255, 0.08); padding: 25px; border-radius: 8px;">
|
| 5505 |
+
<strong>Recall@k = |Relevant ∩ Retrieved@k| / |Relevant|</strong><br><br>
|
| 5506 |
+
<strong>Precision@k = |Relevant ∩ Retrieved@k| / k</strong><br><br>
|
| 5507 |
+
<strong>MRR = 1/|Q| × Σ(1 / rank_i)</strong>
|
| 5508 |
+
</div>
|
| 5509 |
+
|
| 5510 |
+
<div class="callout insight">
|
| 5511 |
+
<div class="callout-title">📝 Paper & Pain: MRR Calculation</div>
|
| 5512 |
+
<strong>3 queries, first relevant result at ranks 1, 3, 2:</strong><br><br>
|
| 5513 |
+
MRR = (1/3) × (1/1 + 1/3 + 1/2)<br>
|
| 5514 |
+
MRR = (1/3) × (1.0 + 0.333 + 0.5)<br>
|
| 5515 |
+
MRR = (1/3) × 1.833 = <strong>0.611</strong><br><br>
|
| 5516 |
+
<strong>Interpretation:</strong> On average, the first relevant result appears around position 1.6
|
| 5517 |
+
</div>
|
| 5518 |
+
|
| 5519 |
+
<h4>Reciprocal Rank Fusion (Hybrid Search)</h4>
|
| 5520 |
+
<div class="formula">
|
| 5521 |
+
<strong>RRF(d) = Σ 1 / (k + rank_i(d))</strong><br><br>
|
| 5522 |
+
Where k = 60 (constant), rank_i = rank from retriever i<br><br>
|
| 5523 |
+
Example: Document D appears at rank 2 in dense, rank 5 in sparse:<br>
|
| 5524 |
+
RRF(D) = 1/(60+2) + 1/(60+5) = 0.0161 + 0.0154 = <strong>0.0315</strong>
|
| 5525 |
+
</div>
|
| 5526 |
+
|
| 5527 |
+
<h4>Chunking Size Impact</h4>
|
| 5528 |
+
<div class="formula">
|
| 5529 |
+
Optimal chunk size depends on:<br>
|
| 5530 |
+
• Embedding model context window (usually 512 tokens max)<br>
|
| 5531 |
+
• Query specificity (specific → smaller chunks)<br>
|
| 5532 |
+
• Document structure (code → function-level, prose → paragraph-level)<br><br>
|
| 5533 |
+
Rule of thumb: <strong>chunk_size ≈ 2-3× expected query length</strong>
|
| 5534 |
+
</div>
|
| 5535 |
+
`,
|
| 5536 |
+
applications: `
|
| 5537 |
+
<h3>RAG in Production</h3>
|
| 5538 |
+
<div class="info-box">
|
| 5539 |
+
<div class="box-title">🔍 Perplexity AI</div>
|
| 5540 |
+
<div class="box-content">
|
| 5541 |
+
RAG-powered search engine: retrieves web pages → feeds to LLM → generates cited answers. Processes 100M+ queries/month.
|
| 5542 |
+
</div>
|
| 5543 |
+
</div>
|
| 5544 |
+
<div class="info-box">
|
| 5545 |
+
<div class="box-title">💼 Enterprise Knowledge Bases</div>
|
| 5546 |
+
<div class="box-content">
|
| 5547 |
+
<strong>Notion AI, Glean, Guru:</strong> Index company docs → answer employee questions with source citations<br>
|
| 5548 |
+
<strong>Legal AI:</strong> Retrieve relevant case law for legal research (Harvey AI)
|
| 5549 |
+
</div>
|
| 5550 |
+
</div>
|
| 5551 |
+
<div class="info-box">
|
| 5552 |
+
<div class="box-title">💻 Code Assistants</div>
|
| 5553 |
+
<div class="box-content">
|
| 5554 |
+
<strong>GitHub Copilot, Cursor:</strong> Retrieve relevant code from the codebase to inform suggestions<br>
|
| 5555 |
+
<strong>Documentation Q&A:</strong> Answer questions about SDKs and libraries
|
| 5556 |
+
</div>
|
| 5557 |
+
</div>
|
| 5558 |
+
<div class="info-box">
|
| 5559 |
+
<div class="box-title">🏥 Healthcare</div>
|
| 5560 |
+
<div class="box-content">
|
| 5561 |
+
Retrieve medical literature → generate evidence-based clinical summaries (Google Med-PaLM 2)
|
| 5562 |
+
</div>
|
| 5563 |
+
</div>
|
| 5564 |
+
|
| 5565 |
+
<div class="callout tip">
|
| 5566 |
+
<div class="callout-title">🛠️ Popular Frameworks</div>
|
| 5567 |
+
• <strong>LangChain:</strong> Most popular, modular pipeline builder<br>
|
| 5568 |
+
• <strong>LlamaIndex:</strong> Data-focused, great for document processing<br>
|
| 5569 |
+
• <strong>Haystack:</strong> Production-ready, NLP-first<br>
|
| 5570 |
+
• <strong>Semantic Kernel:</strong> Microsoft's enterprise RAG framework
|
| 5571 |
+
</div>
|
| 5572 |
+
`
|
| 5573 |
+
},
|
| 5574 |
+
"advanced-llm": {
|
| 5575 |
+
overview: `
|
| 5576 |
+
<h3>Fine-Tuning & Quantization for LLMs</h3>
|
| 5577 |
+
<p>Fine-tuning adapts a pre-trained LLM to a specific task or domain. Quantization reduces model size and inference cost. Together, they enable <strong>deploying powerful AI on limited hardware</strong>.</p>
|
| 5578 |
+
|
| 5579 |
+
<h3>The Fine-Tuning Spectrum</h3>
|
| 5580 |
+
<table>
|
| 5581 |
+
<tr>
|
| 5582 |
+
<th>Method</th>
|
| 5583 |
+
<th>Parameters Trained</th>
|
| 5584 |
+
<th>GPU Memory</th>
|
| 5585 |
+
<th>Best For</th>
|
| 5586 |
+
</tr>
|
| 5587 |
+
<tr>
|
| 5588 |
+
<td>Full Fine-Tuning</td>
|
| 5589 |
+
<td>100%</td>
|
| 5590 |
+
<td>Very High</td>
|
| 5591 |
+
<td>Fundamental behavior change</td>
|
| 5592 |
+
</tr>
|
| 5593 |
+
<tr>
|
| 5594 |
+
<td>LoRA</td>
|
| 5595 |
+
<td>0.1-1%</td>
|
| 5596 |
+
<td>Low</td>
|
| 5597 |
+
<td>Style/domain adaptation</td>
|
| 5598 |
+
</tr>
|
| 5599 |
+
<tr>
|
| 5600 |
+
<td>QLoRA</td>
|
| 5601 |
+
<td>0.1-1%</td>
|
| 5602 |
+
<td>Very Low</td>
|
| 5603 |
+
<td>Consumer GPU fine-tuning</td>
|
| 5604 |
+
</tr>
|
| 5605 |
+
<tr>
|
| 5606 |
+
<td>Prompt Tuning</td>
|
| 5607 |
+
<td>~0.01%</td>
|
| 5608 |
+
<td>Minimal</td>
|
| 5609 |
+
<td>Task-specific adapters</td>
|
| 5610 |
+
</tr>
|
| 5611 |
+
<tr>
|
| 5612 |
+
<td>RLHF / DPO</td>
|
| 5613 |
+
<td>Varies</td>
|
| 5614 |
+
<td>High</td>
|
| 5615 |
+
<td>Alignment & safety</td>
|
| 5616 |
+
</tr>
|
| 5617 |
+
</table>
|
| 5618 |
+
|
| 5619 |
+
<div class="callout tip">
|
| 5620 |
+
<div class="callout-title">💡 When to Fine-Tune vs RAG</div>
|
| 5621 |
+
<strong>Fine-tune when:</strong> You need to change HOW the model responds (tone, format, reasoning style)<br>
|
| 5622 |
+
<strong>Use RAG when:</strong> You need to change WHAT the model knows (add new facts/documents)<br>
|
| 5623 |
+
<strong>Both when:</strong> Production systems needing custom behavior + dynamic knowledge
|
| 5624 |
+
</div>
|
| 5625 |
+
|
| 5626 |
+
<div class="callout warning">
|
| 5627 |
+
<div class="callout-title">⚠️ Fine-Tuning Risks</div>
|
| 5628 |
+
• <strong>Catastrophic forgetting:</strong> Model loses general capabilities<br>
|
| 5629 |
+
• <strong>Overfitting:</strong> Small datasets → memorization instead of learning<br>
|
| 5630 |
+
• <strong>Alignment tax:</strong> Safety guardrails can be weakened<br>
|
| 5631 |
+
• <strong>Data quality:</strong> "Garbage in, garbage out" — bad examples = bad model
|
| 5632 |
+
</div>
|
| 5633 |
+
`,
|
| 5634 |
+
concepts: `
|
| 5635 |
+
<h3>LoRA (Low-Rank Adaptation)</h3>
|
| 5636 |
+
<p>Instead of updating all parameters, LoRA freezes the original weights and injects small trainable matrices into each layer.</p>
|
| 5637 |
+
|
| 5638 |
+
<div class="list-item">
|
| 5639 |
+
<div class="list-num">01</div>
|
| 5640 |
+
<div><strong>Core Idea:</strong> Weight update ΔW can be decomposed as a low-rank matrix: ΔW = B × A, where B ∈ ℝ^(d×r) and A ∈ ℝ^(r×d), with r ≪ d</div>
|
| 5641 |
+
</div>
|
| 5642 |
+
<div class="list-item">
|
| 5643 |
+
<div class="list-num">02</div>
|
| 5644 |
+
<div><strong>Memory Savings:</strong> For a 7B model, full fine-tuning needs ~28GB VRAM. LoRA (r=16) needs ~6GB — a 4.7x reduction.</div>
|
| 5645 |
+
</div>
|
| 5646 |
+
<div class="list-item">
|
| 5647 |
+
<div class="list-num">03</div>
|
| 5648 |
+
<div><strong>Rank (r):</strong> Typical values: r=8, 16, 32, 64. Higher rank = more expressiveness but more parameters.</div>
|
| 5649 |
+
</div>
|
| 5650 |
+
|
| 5651 |
+
<h3>QLoRA (Quantized LoRA)</h3>
|
| 5652 |
+
<div class="list-item">
|
| 5653 |
+
<div class="list-num">04</div>
|
| 5654 |
+
<div><strong>4-bit NormalFloat (NF4):</strong> Quantize frozen weights to 4-bit. Train LoRA adapters in fp16/bf16. Enables fine-tuning 65B models on a single 48GB GPU.</div>
|
| 5655 |
+
</div>
|
| 5656 |
+
<div class="list-item">
|
| 5657 |
+
<div class="list-num">05</div>
|
| 5658 |
+
<div><strong>Double Quantization:</strong> Quantize the quantization constants themselves — saves additional memory.</div>
|
| 5659 |
+
</div>
|
| 5660 |
+
|
| 5661 |
+
<h3>Quantization for Deployment</h3>
|
| 5662 |
+
<div class="list-item">
|
| 5663 |
+
<div class="list-num">06</div>
|
| 5664 |
+
<div><strong>GGUF (llama.cpp):</strong> CPU-optimized format. Run Llama-2 70B on a MacBook with Q4_K_M quantization.</div>
|
| 5665 |
+
</div>
|
| 5666 |
+
<div class="list-item">
|
| 5667 |
+
<div class="list-num">07</div>
|
| 5668 |
+
<div><strong>GPTQ:</strong> GPU-optimized post-training quantization. 3-4 bit with minimal quality loss.</div>
|
| 5669 |
+
</div>
|
| 5670 |
+
<div class="list-item">
|
| 5671 |
+
<div class="list-num">08</div>
|
| 5672 |
+
<div><strong>AWQ (Activation-aware):</strong> Preserves important weight channels. State-of-the-art quality at 4-bit.</div>
|
| 5673 |
+
</div>
|
| 5674 |
+
|
| 5675 |
+
<h3>RLHF & DPO</h3>
|
| 5676 |
+
<div class="list-item">
|
| 5677 |
+
<div class="list-num">09</div>
|
| 5678 |
+
<div><strong>RLHF:</strong> Train a reward model from human preferences, then use PPO to optimize the LLM's policy. Used by ChatGPT, Claude.</div>
|
| 5679 |
+
</div>
|
| 5680 |
+
<div class="list-item">
|
| 5681 |
+
<div class="list-num">10</div>
|
| 5682 |
+
<div><strong>DPO (Direct Preference Optimization):</strong> Skip the reward model entirely — optimize directly from preference pairs. Simpler, lower compute, often comparable results.</div>
|
| 5683 |
+
</div>
|
| 5684 |
+
|
| 5685 |
+
<div class="callout insight">
|
| 5686 |
+
<div class="callout-title">🔁 PEFT (Parameter-Efficient Fine-Tuning)</div>
|
| 5687 |
+
PEFT is the umbrella term for all methods that train <1% of parameters:<br>
|
| 5688 |
+
• <strong>LoRA / QLoRA:</strong> Low-rank weight decomposition<br>
|
| 5689 |
+
• <strong>Prefix Tuning:</strong> Trainable prefix tokens<br>
|
| 5690 |
+
• <strong>Adapters:</strong> Small bottleneck layers inserted into frozen model<br>
|
| 5691 |
+
• <strong>IA3:</strong> Learned rescaling vectors (even fewer params than LoRA)
|
| 5692 |
+
</div>
|
| 5693 |
+
`,
|
| 5694 |
+
math: `
|
| 5695 |
+
<h3>📐 Paper & Pain: LoRA Mathematics</h3>
|
| 5696 |
+
|
| 5697 |
+
<h4>Low-Rank Decomposition</h4>
|
| 5698 |
+
<div class="formula" style="font-size: 1.2rem; text-align: center; margin: 20px 0; background: rgba(0, 212, 255, 0.08); padding: 25px; border-radius: 8px;">
|
| 5699 |
+
<strong>W' = W + ΔW = W + B × A</strong><br>
|
| 5700 |
+
<small>where W ∈ ℝ^(d×d), B ∈ ℝ^(d×r), A ∈ ℝ^(r×d), r ≪ d</small>
|
| 5701 |
+
</div>
|
| 5702 |
+
|
| 5703 |
+
<div class="callout insight">
|
| 5704 |
+
<div class="callout-title">📝 Parameter Count Comparison</div>
|
| 5705 |
+
<strong>Full fine-tuning of one attention layer (d=4096):</strong><br>
|
| 5706 |
+
Parameters = d × d = 4096 × 4096 = <strong>16,777,216</strong><br><br>
|
| 5707 |
+
<strong>LoRA (r=16):</strong><br>
|
| 5708 |
+
Parameters = d × r + r × d = 4096 × 16 + 16 × 4096 = <strong>131,072</strong><br><br>
|
| 5709 |
+
<strong>Reduction: 128x fewer parameters!</strong> (0.78% of original)<br><br>
|
| 5710 |
+
For a full 7B model (all attention layers):<br>
|
| 5711 |
+
Full: ~7B params to train (~28GB VRAM)<br>
|
| 5712 |
+
LoRA (r=16): ~4.2M params to train (~1.5GB for adapters)
|
| 5713 |
+
</div>
|
| 5714 |
+
|
| 5715 |
+
<h4>Quantization Math</h4>
|
| 5716 |
+
<div class="formula">
|
| 5717 |
+
<strong>Linear Quantization (INT8):</strong><br>
|
| 5718 |
+
q = round(w / scale + zero_point)<br>
|
| 5719 |
+
w_approx = (q - zero_point) × scale<br><br>
|
| 5720 |
+
<strong>Example:</strong><br>
|
| 5721 |
+
Weight w = 0.73, scale = 0.01, zero_point = 128<br>
|
| 5722 |
+
q = round(0.73 / 0.01 + 128) = round(201) = 201<br>
|
| 5723 |
+
w_approx = (201 - 128) × 0.01 = 0.73 ✓
|
| 5724 |
+
</div>
|
| 5725 |
+
|
| 5726 |
+
<h4>Memory Savings</h4>
|
| 5727 |
+
<div class="formula">
|
| 5728 |
+
<strong>Model Size = Parameters × Bytes per Parameter</strong><br><br>
|
| 5729 |
+
7B model at fp32: 7B × 4 bytes = 28 GB<br>
|
| 5730 |
+
7B model at fp16: 7B × 2 bytes = 14 GB<br>
|
| 5731 |
+
7B model at INT8: 7B × 1 byte = 7 GB<br>
|
| 5732 |
+
7B model at INT4: 7B × 0.5 bytes = 3.5 GB<br><br>
|
| 5733 |
+
<strong>4-bit quantization = 8x memory reduction!</strong>
|
| 5734 |
+
</div>
|
| 5735 |
+
|
| 5736 |
+
<h4>DPO Loss Function</h4>
|
| 5737 |
+
<div class="formula">
|
| 5738 |
+
L_DPO = -E[log σ(β(log π_θ(y_w|x) - log π_ref(y_w|x) - log π_θ(y_l|x) + log π_ref(y_l|x)))]<br><br>
|
| 5739 |
+
Where:<br>
|
| 5740 |
+
• y_w = preferred response, y_l = rejected response<br>
|
| 5741 |
+
• π_θ = policy model, π_ref = reference model<br>
|
| 5742 |
+
• β = temperature controlling deviation from reference
|
| 5743 |
+
</div>
|
| 5744 |
+
`,
|
| 5745 |
+
applications: `
|
| 5746 |
+
<h3>Production Deployment</h3>
|
| 5747 |
+
<div class="info-box">
|
| 5748 |
+
<div class="box-title">🏠 Local LLM Deployment</div>
|
| 5749 |
+
<div class="box-content">
|
| 5750 |
+
<strong>Ollama:</strong> One-command local LLM deployment (ollama run llama3)<br>
|
| 5751 |
+
<strong>llama.cpp:</strong> CPU inference with GGUF quantized models<br>
|
| 5752 |
+
<strong>vLLM:</strong> High-throughput GPU serving with PagedAttention
|
| 5753 |
+
</div>
|
| 5754 |
+
</div>
|
| 5755 |
+
<div class="info-box">
|
| 5756 |
+
<div class="box-title">🎯 Domain-Specific Fine-Tuning</div>
|
| 5757 |
+
<div class="box-content">
|
| 5758 |
+
<strong>Medical:</strong> BioMistral, Med-PaLM (clinical notes, diagnoses)<br>
|
| 5759 |
+
<strong>Legal:</strong> SaulLM (contract analysis, case law)<br>
|
| 5760 |
+
<strong>Code:</strong> CodeLlama, StarCoder (code generation, completion)
|
| 5761 |
+
</div>
|
| 5762 |
+
</div>
|
| 5763 |
+
<div class="info-box">
|
| 5764 |
+
<div class="box-title">💰 Cost Optimization</div>
|
| 5765 |
+
<div class="box-content">
|
| 5766 |
+
<strong>Distillation:</strong> Train a small model to mimic a large one (GPT-4 → Phi-3)<br>
|
| 5767 |
+
<strong>Speculative Decoding:</strong> Small model drafts, large model verifies (2-3x speedup)<br>
|
| 5768 |
+
<strong>Mixture of Experts:</strong> Only activate 2 of 8 expert networks per token (Mixtral)
|
| 5769 |
+
</div>
|
| 5770 |
+
</div>
|
| 5771 |
+
<div class="info-box">
|
| 5772 |
+
<div class="box-title">🛠️ Key Tools</div>
|
| 5773 |
+
<div class="box-content">
|
| 5774 |
+
<strong>Hugging Face PEFT:</strong> Official LoRA/QLoRA library<br>
|
| 5775 |
+
<strong>Unsloth:</strong> 2x faster fine-tuning with 60% less memory<br>
|
| 5776 |
+
<strong>Axolotl:</strong> Config-driven fine-tuning (no code needed)<br>
|
| 5777 |
+
<strong>TRL:</strong> Transformer Reinforcement Learning (RLHF/DPO)
|
| 5778 |
+
</div>
|
| 5779 |
+
</div>
|
| 5780 |
+
`
|
| 5781 |
}
|
| 5782 |
};
|
| 5783 |
|
| 5784 |
+
|
| 5785 |
function createModuleHTML(module) {
|
| 5786 |
const content = MODULE_CONTENT[module.id] || {};
|
| 5787 |
|
|
|
|
| 6001 |
'vit': drawVisionTransformer,
|
| 6002 |
'gnn': drawGraphNetwork,
|
| 6003 |
'seq2seq': drawSeq2SeqAttention,
|
| 6004 |
+
'research-papers': drawDefaultVisualization,
|
| 6005 |
+
'vector-db': drawVectorSpace,
|
| 6006 |
+
'rag': drawRAGPipeline,
|
| 6007 |
+
'advanced-llm': drawLoRADiagram
|
| 6008 |
};
|
| 6009 |
|
| 6010 |
if (vizMap[moduleId]) {
|
|
|
|
| 7676 |
ctx.fillText('🛒 Pinterest/Amazon Recommendations', canvas.width / 2, 180);
|
| 7677 |
}
|
| 7678 |
|
| 7679 |
+
// ============ GenAI Visualizations ============
|
| 7680 |
+
|
| 7681 |
+
function drawVectorSpace(ctx, canvas) {
|
| 7682 |
+
const w = canvas.width, h = canvas.height;
|
| 7683 |
+
ctx.fillStyle = '#00c9a7';
|
| 7684 |
+
ctx.font = 'bold 16px Arial';
|
| 7685 |
+
ctx.textAlign = 'center';
|
| 7686 |
+
ctx.fillText('Vector Space — Similarity Search', w / 2, 30);
|
| 7687 |
+
|
| 7688 |
+
// Draw axes
|
| 7689 |
+
ctx.strokeStyle = 'rgba(0, 201, 167, 0.3)';
|
| 7690 |
+
ctx.lineWidth = 1;
|
| 7691 |
+
ctx.beginPath(); ctx.moveTo(60, h - 40); ctx.lineTo(w - 20, h - 40); ctx.stroke();
|
| 7692 |
+
ctx.beginPath(); ctx.moveTo(60, h - 40); ctx.lineTo(60, 50); ctx.stroke();
|
| 7693 |
+
ctx.fillStyle = '#b0b7c3'; ctx.font = '11px Arial';
|
| 7694 |
+
ctx.fillText('Dimension 1', w / 2, h - 15);
|
| 7695 |
+
ctx.save(); ctx.translate(15, h / 2); ctx.rotate(-Math.PI / 2);
|
| 7696 |
+
ctx.fillText('Dimension 2', 0, 0); ctx.restore();
|
| 7697 |
+
|
| 7698 |
+
// Cluster A (documents)
|
| 7699 |
+
const clusterA = [
|
| 7700 |
+
{ x: 200, y: 120, label: 'doc1' }, { x: 230, y: 100, label: 'doc2' },
|
| 7701 |
+
{ x: 180, y: 140, label: 'doc3' }, { x: 220, y: 150, label: 'doc4' }
|
| 7702 |
+
];
|
| 7703 |
+
// Cluster B
|
| 7704 |
+
const clusterB = [
|
| 7705 |
+
{ x: 420, y: 200, label: 'doc5' }, { x: 450, y: 220, label: 'doc6' },
|
| 7706 |
+
{ x: 400, y: 240, label: 'doc7' }
|
| 7707 |
+
];
|
| 7708 |
+
// Query
|
| 7709 |
+
const query = { x: 190, y: 130, label: '🔍 Query' };
|
| 7710 |
+
|
| 7711 |
+
// Draw cluster backgrounds
|
| 7712 |
+
ctx.fillStyle = 'rgba(0, 136, 255, 0.08)';
|
| 7713 |
+
ctx.beginPath(); ctx.arc(210, 130, 60, 0, Math.PI * 2); ctx.fill();
|
| 7714 |
+
ctx.fillStyle = 'rgba(255, 107, 53, 0.08)';
|
| 7715 |
+
ctx.beginPath(); ctx.arc(423, 220, 55, 0, Math.PI * 2); ctx.fill();
|
| 7716 |
+
|
| 7717 |
+
// Draw similarity lines from query to nearest
|
| 7718 |
+
ctx.strokeStyle = 'rgba(0, 255, 136, 0.5)';
|
| 7719 |
+
ctx.lineWidth = 2;
|
| 7720 |
+
ctx.setLineDash([5, 3]);
|
| 7721 |
+
clusterA.forEach(d => {
|
| 7722 |
+
ctx.beginPath(); ctx.moveTo(query.x, query.y);
|
| 7723 |
+
ctx.lineTo(d.x, d.y); ctx.stroke();
|
| 7724 |
+
});
|
| 7725 |
+
ctx.setLineDash([]);
|
| 7726 |
+
|
| 7727 |
+
// Draw document points
|
| 7728 |
+
clusterA.forEach(d => {
|
| 7729 |
+
ctx.fillStyle = '#0088ff';
|
| 7730 |
+
ctx.beginPath(); ctx.arc(d.x, d.y, 8, 0, Math.PI * 2); ctx.fill();
|
| 7731 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = '10px Arial';
|
| 7732 |
+
ctx.fillText(d.label, d.x + 12, d.y + 4);
|
| 7733 |
+
});
|
| 7734 |
+
clusterB.forEach(d => {
|
| 7735 |
+
ctx.fillStyle = '#ff6b35';
|
| 7736 |
+
ctx.beginPath(); ctx.arc(d.x, d.y, 8, 0, Math.PI * 2); ctx.fill();
|
| 7737 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = '10px Arial';
|
| 7738 |
+
ctx.fillText(d.label, d.x + 12, d.y + 4);
|
| 7739 |
+
});
|
| 7740 |
+
|
| 7741 |
+
// Draw query
|
| 7742 |
+
ctx.fillStyle = '#00ff88';
|
| 7743 |
+
ctx.beginPath(); ctx.arc(query.x, query.y, 10, 0, Math.PI * 2); ctx.fill();
|
| 7744 |
+
ctx.fillStyle = '#00ff88'; ctx.font = 'bold 12px Arial';
|
| 7745 |
+
ctx.fillText(query.label, query.x - 15, query.y - 18);
|
| 7746 |
+
|
| 7747 |
+
// Legend
|
| 7748 |
+
ctx.font = '11px Arial'; let ly = h - 80;
|
| 7749 |
+
[['#0088ff', 'Cluster A (nearest)'], ['#ff6b35', 'Cluster B'], ['#00ff88', 'Query Vector']].forEach(([c, t]) => {
|
| 7750 |
+
ctx.fillStyle = c; ctx.fillRect(w - 160, ly, 10, 10);
|
| 7751 |
+
ctx.fillStyle = '#e4e6eb'; ctx.fillText(t, w - 145, ly + 9); ly += 18;
|
| 7752 |
+
});
|
| 7753 |
+
|
| 7754 |
+
// Cosine score
|
| 7755 |
+
ctx.fillStyle = '#00c9a7'; ctx.font = 'bold 13px Arial';
|
| 7756 |
+
ctx.fillText('cos(query, doc1) = 0.97', w / 2, h - 55);
|
| 7757 |
+
}
|
| 7758 |
+
|
| 7759 |
+
function drawRAGPipeline(ctx, canvas) {
|
| 7760 |
+
const w = canvas.width, h = canvas.height;
|
| 7761 |
+
ctx.fillStyle = '#00c9a7';
|
| 7762 |
+
ctx.font = 'bold 16px Arial';
|
| 7763 |
+
ctx.textAlign = 'center';
|
| 7764 |
+
ctx.fillText('RAG Pipeline — Retrieval-Augmented Generation', w / 2, 30);
|
| 7765 |
+
|
| 7766 |
+
const boxes = [
|
| 7767 |
+
{ x: 30, y: 70, w: 90, h: 50, label: '📄 Docs', color: '#0088ff' },
|
| 7768 |
+
{ x: 145, y: 70, w: 90, h: 50, label: '✂️ Chunk', color: '#0088ff' },
|
| 7769 |
+
{ x: 260, y: 70, w: 90, h: 50, label: '🧮 Embed', color: '#0088ff' },
|
| 7770 |
+
{ x: 375, y: 70, w: 100, h: 50, label: '🧲 Vector DB', color: '#0088ff' },
|
| 7771 |
+
{ x: 30, y: 180, w: 90, h: 50, label: '❓ Query', color: '#00ff88' },
|
| 7772 |
+
{ x: 145, y: 180, w: 90, h: 50, label: '🧮 Embed', color: '#00ff88' },
|
| 7773 |
+
{ x: 260, y: 180, w: 90, h: 50, label: '🔍 Retrieve', color: '#ff6b35' },
|
| 7774 |
+
{ x: 375, y: 180, w: 100, h: 50, label: '🤖 LLM', color: '#ffaa00' },
|
| 7775 |
+
{ x: 500, y: 180, w: 90, h: 50, label: '✅ Answer', color: '#00c9a7' }
|
| 7776 |
+
];
|
| 7777 |
+
|
| 7778 |
+
// Draw arrows (indexing pipeline)
|
| 7779 |
+
ctx.strokeStyle = 'rgba(0, 136, 255, 0.6)'; ctx.lineWidth = 2;
|
| 7780 |
+
for (let i = 0; i < 3; i++) {
|
| 7781 |
+
ctx.beginPath();
|
| 7782 |
+
ctx.moveTo(boxes[i].x + boxes[i].w, boxes[i].y + 25);
|
| 7783 |
+
ctx.lineTo(boxes[i + 1].x, boxes[i + 1].y + 25);
|
| 7784 |
+
ctx.stroke();
|
| 7785 |
+
// Arrowhead
|
| 7786 |
+
ctx.fillStyle = 'rgba(0, 136, 255, 0.6)';
|
| 7787 |
+
ctx.beginPath();
|
| 7788 |
+
ctx.moveTo(boxes[i + 1].x, boxes[i + 1].y + 25);
|
| 7789 |
+
ctx.lineTo(boxes[i + 1].x - 8, boxes[i + 1].y + 20);
|
| 7790 |
+
ctx.lineTo(boxes[i + 1].x - 8, boxes[i + 1].y + 30);
|
| 7791 |
+
ctx.fill();
|
| 7792 |
+
}
|
| 7793 |
+
|
| 7794 |
+
// Draw arrows (query pipeline)
|
| 7795 |
+
ctx.strokeStyle = 'rgba(0, 255, 136, 0.6)';
|
| 7796 |
+
for (let i = 4; i < 8; i++) {
|
| 7797 |
+
ctx.beginPath();
|
| 7798 |
+
ctx.moveTo(boxes[i].x + boxes[i].w, boxes[i].y + 25);
|
| 7799 |
+
ctx.lineTo(boxes[i + 1].x, boxes[i + 1].y + 25);
|
| 7800 |
+
ctx.stroke();
|
| 7801 |
+
ctx.fillStyle = 'rgba(0, 255, 136, 0.6)';
|
| 7802 |
+
ctx.beginPath();
|
| 7803 |
+
ctx.moveTo(boxes[i + 1].x, boxes[i + 1].y + 25);
|
| 7804 |
+
ctx.lineTo(boxes[i + 1].x - 8, boxes[i + 1].y + 20);
|
| 7805 |
+
ctx.lineTo(boxes[i + 1].x - 8, boxes[i + 1].y + 30);
|
| 7806 |
+
ctx.fill();
|
| 7807 |
+
}
|
| 7808 |
+
|
| 7809 |
+
// Vector DB connection (vertical)
|
| 7810 |
+
ctx.strokeStyle = 'rgba(255, 107, 53, 0.6)'; ctx.setLineDash([4, 3]);
|
| 7811 |
+
ctx.beginPath();
|
| 7812 |
+
ctx.moveTo(boxes[3].x + 50, boxes[3].y + boxes[3].h);
|
| 7813 |
+
ctx.lineTo(boxes[6].x + 45, boxes[6].y);
|
| 7814 |
+
ctx.stroke(); ctx.setLineDash([]);
|
| 7815 |
+
|
| 7816 |
+
// Draw boxes
|
| 7817 |
+
boxes.forEach(b => {
|
| 7818 |
+
ctx.fillStyle = 'rgba(0, 0, 0, 0.5)';
|
| 7819 |
+
ctx.fillRect(b.x, b.y, b.w, b.h);
|
| 7820 |
+
ctx.strokeStyle = b.color; ctx.lineWidth = 2;
|
| 7821 |
+
ctx.strokeRect(b.x, b.y, b.w, b.h);
|
| 7822 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = '11px Arial'; ctx.textAlign = 'center';
|
| 7823 |
+
ctx.fillText(b.label, b.x + b.w / 2, b.y + 30);
|
| 7824 |
+
});
|
| 7825 |
+
|
| 7826 |
+
// Labels
|
| 7827 |
+
ctx.font = 'bold 12px Arial'; ctx.textAlign = 'left';
|
| 7828 |
+
ctx.fillStyle = '#0088ff'; ctx.fillText('Indexing (Offline)', 30, 60);
|
| 7829 |
+
ctx.fillStyle = '#00ff88'; ctx.fillText('Query (Online)', 30, 170);
|
| 7830 |
+
}
|
| 7831 |
+
|
| 7832 |
+
function drawLoRADiagram(ctx, canvas) {
|
| 7833 |
+
const w = canvas.width, h = canvas.height;
|
| 7834 |
+
ctx.fillStyle = '#00c9a7';
|
| 7835 |
+
ctx.font = 'bold 16px Arial';
|
| 7836 |
+
ctx.textAlign = 'center';
|
| 7837 |
+
ctx.fillText('LoRA — Low-Rank Adaptation', w / 2, 30);
|
| 7838 |
+
|
| 7839 |
+
// Original Weight Matrix W (large)
|
| 7840 |
+
const wX = 50, wY = 60, wW = 120, wH = 120;
|
| 7841 |
+
ctx.fillStyle = 'rgba(0, 136, 255, 0.15)';
|
| 7842 |
+
ctx.fillRect(wX, wY, wW, wH);
|
| 7843 |
+
ctx.strokeStyle = '#0088ff'; ctx.lineWidth = 2;
|
| 7844 |
+
ctx.strokeRect(wX, wY, wW, wH);
|
| 7845 |
+
ctx.fillStyle = '#0088ff'; ctx.font = 'bold 14px Arial';
|
| 7846 |
+
ctx.fillText('W', wX + wW / 2, wY + wH / 2 + 5);
|
| 7847 |
+
ctx.font = '10px Arial'; ctx.fillStyle = '#b0b7c3';
|
| 7848 |
+
ctx.fillText('d × d', wX + wW / 2, wY + wH / 2 + 22);
|
| 7849 |
+
ctx.fillText('(Frozen)', wX + wW / 2, wY - 8);
|
| 7850 |
+
|
| 7851 |
+
// Plus sign
|
| 7852 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = 'bold 24px Arial';
|
| 7853 |
+
ctx.fillText('+', wX + wW + 30, wY + wH / 2 + 8);
|
| 7854 |
+
|
| 7855 |
+
// LoRA matrices B and A
|
| 7856 |
+
const bX = 230, bY = 60, bW = 30, bH = 120;
|
| 7857 |
+
ctx.fillStyle = 'rgba(0, 255, 136, 0.2)';
|
| 7858 |
+
ctx.fillRect(bX, bY, bW, bH);
|
| 7859 |
+
ctx.strokeStyle = '#00ff88'; ctx.lineWidth = 2;
|
| 7860 |
+
ctx.strokeRect(bX, bY, bW, bH);
|
| 7861 |
+
ctx.fillStyle = '#00ff88'; ctx.font = 'bold 14px Arial';
|
| 7862 |
+
ctx.fillText('B', bX + bW / 2, bY + bH / 2 + 5);
|
| 7863 |
+
ctx.font = '10px Arial'; ctx.fillStyle = '#b0b7c3';
|
| 7864 |
+
ctx.fillText('d×r', bX + bW / 2, bY - 8);
|
| 7865 |
+
|
| 7866 |
+
// × symbol
|
| 7867 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = 'bold 18px Arial';
|
| 7868 |
+
ctx.fillText('×', bX + bW + 18, bY + bH / 2 + 5);
|
| 7869 |
+
|
| 7870 |
+
const aX = 290, aY = 100, aW = 120, aH = 30;
|
| 7871 |
+
ctx.fillStyle = 'rgba(255, 107, 53, 0.2)';
|
| 7872 |
+
ctx.fillRect(aX, aY, aW, aH);
|
| 7873 |
+
ctx.strokeStyle = '#ff6b35'; ctx.lineWidth = 2;
|
| 7874 |
+
ctx.strokeRect(aX, aY, aW, aH);
|
| 7875 |
+
ctx.fillStyle = '#ff6b35'; ctx.font = 'bold 14px Arial';
|
| 7876 |
+
ctx.fillText('A', aX + aW / 2, aY + aH / 2 + 5);
|
| 7877 |
+
ctx.font = '10px Arial'; ctx.fillStyle = '#b0b7c3';
|
| 7878 |
+
ctx.fillText('r×d', aX + aW / 2, aY - 8);
|
| 7879 |
+
|
| 7880 |
+
// = sign
|
| 7881 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = 'bold 24px Arial';
|
| 7882 |
+
ctx.fillText('=', aX + aW + 25, wY + wH / 2 + 8);
|
| 7883 |
+
|
| 7884 |
+
// Result W'
|
| 7885 |
+
const rX = 460, rY = 60, rW = 120, rH = 120;
|
| 7886 |
+
ctx.fillStyle = 'rgba(0, 201, 167, 0.15)';
|
| 7887 |
+
ctx.fillRect(rX, rY, rW, rH);
|
| 7888 |
+
ctx.strokeStyle = '#00c9a7'; ctx.lineWidth = 2;
|
| 7889 |
+
ctx.strokeRect(rX, rY, rW, rH);
|
| 7890 |
+
ctx.fillStyle = '#00c9a7'; ctx.font = 'bold 14px Arial';
|
| 7891 |
+
ctx.fillText("W'", rX + rW / 2, rY + rH / 2 + 5);
|
| 7892 |
+
ctx.font = '10px Arial'; ctx.fillStyle = '#b0b7c3';
|
| 7893 |
+
ctx.fillText('d × d', rX + rW / 2, rY + rH / 2 + 22);
|
| 7894 |
+
ctx.fillText('(Adapted)', rX + rW / 2, rY - 8);
|
| 7895 |
+
|
| 7896 |
+
// Stats
|
| 7897 |
+
ctx.font = '12px Arial'; ctx.textAlign = 'left';
|
| 7898 |
+
ctx.fillStyle = '#0088ff'; ctx.fillText('Full: 16,777,216 params', 50, 220);
|
| 7899 |
+
ctx.fillStyle = '#00ff88'; ctx.fillText('LoRA (r=16): 131,072 params', 50, 240);
|
| 7900 |
+
ctx.fillStyle = '#00c9a7'; ctx.fillText('Savings: 128x reduction (0.78%)', 50, 260);
|
| 7901 |
+
|
| 7902 |
+
// Quantization bar chart
|
| 7903 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = 'bold 12px Arial'; ctx.textAlign = 'center';
|
| 7904 |
+
ctx.fillText('Memory: 7B Model', 480, 210);
|
| 7905 |
+
const bars = [
|
| 7906 |
+
{ label: 'fp32', val: 28, color: '#ff6b35' },
|
| 7907 |
+
{ label: 'fp16', val: 14, color: '#ffaa00' },
|
| 7908 |
+
{ label: 'INT8', val: 7, color: '#0088ff' },
|
| 7909 |
+
{ label: 'INT4', val: 3.5, color: '#00ff88' }
|
| 7910 |
+
];
|
| 7911 |
+
bars.forEach((b, i) => {
|
| 7912 |
+
const bw = b.val * 4.5;
|
| 7913 |
+
ctx.fillStyle = b.color;
|
| 7914 |
+
ctx.fillRect(430, 220 + i * 22, bw, 16);
|
| 7915 |
+
ctx.fillStyle = '#e4e6eb'; ctx.font = '10px Arial'; ctx.textAlign = 'left';
|
| 7916 |
+
ctx.fillText(`${b.label}: ${b.val}GB`, 430 + bw + 5, 233 + i * 22);
|
| 7917 |
+
});
|
| 7918 |
+
}
|
| 7919 |
+
|
| 7920 |
initDashboard();
|
| 7921 |
</script>
|
| 7922 |
</body>
|
index.html
CHANGED
|
@@ -488,7 +488,8 @@
|
|
| 488 |
<div class="module-card-body">
|
| 489 |
<h2 class="module-card-title">Deep Learning Masterclass</h2>
|
| 490 |
<p class="module-card-description">
|
| 491 |
-
Complete Zero to Hero journey. CNNs, RNNs,
|
|
|
|
| 492 |
Featuring rigorous "Paper & Pain" math and interactive visualizations.
|
| 493 |
</p>
|
| 494 |
</div>
|
|
|
|
| 488 |
<div class="module-card-body">
|
| 489 |
<h2 class="module-card-title">Deep Learning Masterclass</h2>
|
| 490 |
<p class="module-card-description">
|
| 491 |
+
Complete Zero to Hero journey. CNNs, RNNs, Transformers, GANs, Diffusion Models,
|
| 492 |
+
plus GenAI essentials: RAG, Vector Databases, LoRA & Quantization.
|
| 493 |
Featuring rigorous "Paper & Pain" math and interactive visualizations.
|
| 494 |
</p>
|
| 495 |
</div>
|
shared/js/search.js
CHANGED
|
@@ -36,7 +36,10 @@
|
|
| 36 |
{ title: 'GANs - Generative Adversarial Networks', section: 'gans' },
|
| 37 |
{ title: 'Diffusion Models', section: 'diffusion' },
|
| 38 |
{ title: 'Regularization & Dropout', section: 'regularization' },
|
| 39 |
-
{ title: 'Batch Normalization', section: 'batchnorm' }
|
|
|
|
|
|
|
|
|
|
| 40 |
]
|
| 41 |
},
|
| 42 |
{
|
|
|
|
| 36 |
{ title: 'GANs - Generative Adversarial Networks', section: 'gans' },
|
| 37 |
{ title: 'Diffusion Models', section: 'diffusion' },
|
| 38 |
{ title: 'Regularization & Dropout', section: 'regularization' },
|
| 39 |
+
{ title: 'Batch Normalization', section: 'batchnorm' },
|
| 40 |
+
{ title: 'Vector Databases', section: 'vector-db' },
|
| 41 |
+
{ title: 'RAG Pipelines', section: 'rag' },
|
| 42 |
+
{ title: 'Fine-Tuning & Quantization', section: 'advanced-llm' }
|
| 43 |
]
|
| 44 |
},
|
| 45 |
{
|