diff --git a/scripts/add_colab_badges.py b/scripts/add_colab_badges.py
index 155c6900270809ef631950009f4fa102ba7ca163..e6f0c1a2e74c1098acab0be0c0a86d2b209ccdf4 100644
--- a/scripts/add_colab_badges.py
+++ b/scripts/add_colab_badges.py
@@ -1,27 +1,29 @@
 #!/usr/bin/env python3
-"""Add 'Open in Colab' badges to all template notebooks."""
+"""Add 'Open in Colab' badges to all template and solution notebooks."""
 
 import json
 from pathlib import Path
 
 REPO = "duoan/TorchCode"
 BRANCH = "master"
-TEMPLATES_DIR = Path(__file__).resolve().parent.parent / "templates"
+ROOT = Path(__file__).resolve().parent.parent
+TEMPLATES_DIR = ROOT / "templates"
+SOLUTIONS_DIR = ROOT / "solutions"
 BADGE_IMG = "https://colab.research.google.com/assets/colab-badge.svg"
 
 
-def colab_url(filename: str) -> str:
+def colab_url(filename: str, folder: str) -> str:
     return (
         f"https://colab.research.google.com/github/{REPO}"
-        f"/blob/{BRANCH}/templates/{filename}"
+        f"/blob/{BRANCH}/{folder}/{filename}"
     )
 
 
-def badge_markdown(filename: str) -> str:
-    return f"[![Open In Colab]({BADGE_IMG})]({colab_url(filename)})"
+def badge_markdown(filename: str, folder: str) -> str:
+    return f"[![Open In Colab]({BADGE_IMG})]({colab_url(filename, folder)})"
 
 
-def process_notebook(path: Path) -> bool:
+def process_notebook(path: Path, folder: str) -> bool:
     with open(path, "r", encoding="utf-8") as f:
         nb = json.load(f)
 
@@ -34,7 +36,7 @@ def process_notebook(path: Path) -> bool:
     if "colab-badge.svg" in flat:
         return False
 
-    badge = badge_markdown(path.name)
+    badge = badge_markdown(path.name, folder)
     cells[0]["source"] = [badge + "\n\n"] + (
         source_lines if isinstance(source_lines, list) else [source_lines]
     )
@@ -49,11 +51,17 @@ def process_notebook(path: Path) -> bool:
 def main() -> None:
     updated = 0
     for nb_path in sorted(TEMPLATES_DIR.glob("*.ipynb")):
-        if process_notebook(nb_path):
-            print(f"  ✅ {nb_path.name}")
+        if process_notebook(nb_path, "templates"):
+            print(f"  ✅ templates/{nb_path.name}")
             updated += 1
         else:
-            print(f"  ⏭️  {nb_path.name} (already has badge or skipped)")
+            print(f"  ⏭️  templates/{nb_path.name} (already has badge or skipped)")
+    for nb_path in sorted(SOLUTIONS_DIR.glob("*.ipynb")):
+        if process_notebook(nb_path, "solutions"):
+            print(f"  ✅ solutions/{nb_path.name}")
+            updated += 1
+        else:
+            print(f"  ⏭️  solutions/{nb_path.name} (already has badge or skipped)")
     print(f"\nDone — updated {updated} notebooks.")
 
 
diff --git a/scripts/add_colab_torch_judge_install.py b/scripts/add_colab_torch_judge_install.py
new file mode 100644
index 0000000000000000000000000000000000000000..936e15e3b9d1253823b7464e101e9183f0d87d29
--- /dev/null
+++ b/scripts/add_colab_torch_judge_install.py
@@ -0,0 +1,79 @@
+#!/usr/bin/env python3
+"""Add Colab-only pip install of torch-judge to all notebooks that use torch_judge."""
+
+import json
+from pathlib import Path
+
+INSTALL_CELL_SOURCE = [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n",
+]
+
+MARKER = "get_ipython().run_line_magic('pip', 'install"
+
+
+def has_torch_judge(nb: dict) -> bool:
+    for cell in nb.get("cells", []):
+        src = cell.get("source", [])
+        flat = "".join(src) if isinstance(src, list) else str(src)
+        if "torch_judge" in flat:
+            return True
+    return False
+
+
+def already_has_install(nb: dict) -> bool:
+    for cell in nb.get("cells", []):
+        src = cell.get("source", [])
+        flat = "".join(src) if isinstance(src, list) else str(src)
+        if MARKER in flat and "torch-judge" in flat:
+            return True
+    return False
+
+
+def process_notebook(path: Path) -> bool:
+    with open(path, "r", encoding="utf-8") as f:
+        nb = json.load(f)
+
+    if not has_torch_judge(nb):
+        return False
+    if already_has_install(nb):
+        return False
+
+    cells = nb["cells"]
+    if not cells:
+        return False
+
+    # Insert install cell at index 1 (after first cell, usually markdown title)
+    install_cell = {
+        "cell_type": "code",
+        "metadata": {},
+        "source": INSTALL_CELL_SOURCE,
+        "outputs": [],
+        "execution_count": None,
+    }
+    cells.insert(1, install_cell)
+
+    with open(path, "w", encoding="utf-8") as f:
+        json.dump(nb, f, ensure_ascii=False, indent=1)
+        f.write("\n")
+
+    return True
+
+
+def main() -> None:
+    root = Path(__file__).resolve().parent.parent
+    updated = 0
+    for pattern in ["templates/*.ipynb", "solutions/*.ipynb"]:
+        for path in sorted(root.glob(pattern)):
+            if process_notebook(path):
+                print(f"  + {path.relative_to(root)}")
+                updated += 1
+    print(f"Updated {updated} notebooks.")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/solutions/01_relu_solution.ipynb b/solutions/01_relu_solution.ipynb
index 4e5feb2560a927d647ab53559b655e896d7406ee..085e3aba5a7659631e786ca78b2a0c18e9834eee 100644
--- a/solutions/01_relu_solution.ipynb
+++ b/solutions/01_relu_solution.ipynb
@@ -1,73 +1,89 @@
 {
- "nbformat": 4,
- "nbformat_minor": 5,
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.11.0"
-  }
- },
  "cells": [
   {
    "cell_type": "markdown",
+   "id": "0556419b",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb)\n",
+    "\n",
     "# 🟢 Solution: Implement ReLU\n",
     "\n",
     "Reference solution for the ReLU activation function.\n",
     "\n",
     "$$\\text{ReLU}(x) = \\max(0, x)$$"
-   ],
-   "outputs": []
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
-    "import torch"
-   ],
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
    "outputs": [],
-   "execution_count": null
+   "source": [
+    "import torch"
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# ✅ SOLUTION\n",
     "\n",
     "def relu(x: torch.Tensor) -> torch.Tensor:\n",
     "    return x * (x > 0).float()"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Verify\n",
     "x = torch.tensor([-2., -1., 0., 1., 2.])\n",
     "print(\"Input: \", x)\n",
     "print(\"Output:\", relu(x))"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
    "source": [
     "# Run judge\n",
     "from torch_judge import check\n",
     "check(\"relu\")"
-   ],
-   "outputs": [],
-   "execution_count": null
+   ]
   }
- ]
-}
\ No newline at end of file
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/solutions/02_softmax_solution.ipynb b/solutions/02_softmax_solution.ipynb
index 6750f84730d771fc759b36d8813b2e541ff8c5ac..902106e9906a06d918f0b223f6e60899ffdd1fe6 100644
--- a/solutions/02_softmax_solution.ipynb
+++ b/solutions/02_softmax_solution.ipynb
@@ -17,6 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb)\n\n",
     "# 🟢 Solution: Implement Softmax\n",
     "\n",
     "Reference solution for the numerically-stable Softmax function.\n",
@@ -25,6 +26,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -73,4 +88,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/03_linear_solution.ipynb b/solutions/03_linear_solution.ipynb
index f13c199475db957b977753c42966d66d904e6d5a..4f25a88a0b9a7dd991afafcd2268950869c8e349 100644
--- a/solutions/03_linear_solution.ipynb
+++ b/solutions/03_linear_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb)\n\n",
     "# 🟡 Solution: Simple Linear Layer\n",
     "\n",
     "Reference solution for a fully-connected linear layer: **y = xW^T + b**"
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -77,4 +92,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/04_layernorm_solution.ipynb b/solutions/04_layernorm_solution.ipynb
index dc114676bea4b201ee95ccd9ec7a441cb7dc6a96..59ac87c2a46f9716bd2fdb07bb58a5ec438d1c41 100644
--- a/solutions/04_layernorm_solution.ipynb
+++ b/solutions/04_layernorm_solution.ipynb
@@ -17,6 +17,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb)\n\n",
     "# 🟡 Solution: Implement LayerNorm\n",
     "\n",
     "Reference solution for Layer Normalization.\n",
@@ -25,6 +26,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -76,4 +91,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/05_attention_solution.ipynb b/solutions/05_attention_solution.ipynb
index 2f0fd847b363538101ae1b8d5f4ebeb87db425ad..e82f45fc2d37d6ed2dedeba19dfc76729f191a08 100644
--- a/solutions/05_attention_solution.ipynb
+++ b/solutions/05_attention_solution.ipynb
@@ -5,6 +5,7 @@
    "id": "5f63d076",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Softmax Attention\n",
     "\n",
     "Reference solution for the core Transformer attention mechanism.\n",
@@ -12,6 +13,21 @@
     "$$\\text{Attention}(Q, K, V) = \\text{softmax}\\!\\left(\\frac{QK^T}{\\sqrt{d_k}}\\right)V$$"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "ce663fb0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/solutions/06_multihead_attention_solution.ipynb b/solutions/06_multihead_attention_solution.ipynb
index ff1e3f2ce98c2809e7f3045c9dbe34de7998c59a..7ed4ad40dfcc66e21072dc8c4f7681c0df365c89 100644
--- a/solutions/06_multihead_attention_solution.ipynb
+++ b/solutions/06_multihead_attention_solution.ipynb
@@ -1,105 +1,120 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# 🔴 Solution: Multi-Head Attention\n",
-        "\n",
-        "Reference solution for the Multi-Head Attention mechanism.\n",
-        "\n",
-        "$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn\n",
-        "import math"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "46b73737",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SOLUTION\n",
-        "\n",
-        "class MultiHeadAttention:\n",
-        "    def __init__(self, d_model: int, num_heads: int):\n",
-        "        self.num_heads = num_heads\n",
-        "        self.d_k = d_model // num_heads\n",
-        "\n",
-        "        self.W_q = nn.Linear(d_model, d_model)\n",
-        "        self.W_k = nn.Linear(d_model, d_model)\n",
-        "        self.W_v = nn.Linear(d_model, d_model)\n",
-        "        self.W_o = nn.Linear(d_model, d_model)\n",
-        "\n",
-        "    def forward(self, Q, K, V):\n",
-        "        B, S_q, _ = Q.shape\n",
-        "        S_k = K.shape[1]\n",
-        "\n",
-        "        q = self.W_q(Q).view(B, S_q, self.num_heads, self.d_k).transpose(1, 2)\n",
-        "        k = self.W_k(K).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
-        "        v = self.W_v(V).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
-        "\n",
-        "        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)\n",
-        "        weights = torch.softmax(scores, dim=-1)\n",
-        "        attn = torch.matmul(weights, v)\n",
-        "\n",
-        "        out = attn.transpose(1, 2).contiguous().view(B, S_q, -1)\n",
-        "        return self.W_o(out)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Verify\n",
-        "torch.manual_seed(0)\n",
-        "mha = MultiHeadAttention(d_model=32, num_heads=4)\n",
-        "x = torch.randn(2, 6, 32)\n",
-        "out = mha.forward(x, x, x)\n",
-        "print(\"Self-attn shape:\", out.shape)\n",
-        "\n",
-        "Q = torch.randn(1, 3, 32)\n",
-        "K = torch.randn(1, 7, 32)\n",
-        "V = torch.randn(1, 7, 32)\n",
-        "out2 = mha.forward(Q, K, V)\n",
-        "print(\"Cross-attn shape:\", out2.shape)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Run judge\n",
-        "from torch_judge import check\n",
-        "check(\"mha\")"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11.0"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb)\n\n",
+    "# 🔴 Solution: Multi-Head Attention\n",
+    "\n",
+    "Reference solution for the Multi-Head Attention mechanism.\n",
+    "\n",
+    "$$\\text{MultiHead}(Q, K, V) = \\text{Concat}(\\text{head}_1, \\dots, \\text{head}_h) W^O$$"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn\n",
+    "import math"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "46b73737",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SOLUTION\n",
+    "\n",
+    "class MultiHeadAttention:\n",
+    "    def __init__(self, d_model: int, num_heads: int):\n",
+    "        self.num_heads = num_heads\n",
+    "        self.d_k = d_model // num_heads\n",
+    "\n",
+    "        self.W_q = nn.Linear(d_model, d_model)\n",
+    "        self.W_k = nn.Linear(d_model, d_model)\n",
+    "        self.W_v = nn.Linear(d_model, d_model)\n",
+    "        self.W_o = nn.Linear(d_model, d_model)\n",
+    "\n",
+    "    def forward(self, Q, K, V):\n",
+    "        B, S_q, _ = Q.shape\n",
+    "        S_k = K.shape[1]\n",
+    "\n",
+    "        q = self.W_q(Q).view(B, S_q, self.num_heads, self.d_k).transpose(1, 2)\n",
+    "        k = self.W_k(K).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
+    "        v = self.W_v(V).view(B, S_k, self.num_heads, self.d_k).transpose(1, 2)\n",
+    "\n",
+    "        scores = torch.matmul(q, k.transpose(-2, -1)) / math.sqrt(self.d_k)\n",
+    "        weights = torch.softmax(scores, dim=-1)\n",
+    "        attn = torch.matmul(weights, v)\n",
+    "\n",
+    "        out = attn.transpose(1, 2).contiguous().view(B, S_q, -1)\n",
+    "        return self.W_o(out)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Verify\n",
+    "torch.manual_seed(0)\n",
+    "mha = MultiHeadAttention(d_model=32, num_heads=4)\n",
+    "x = torch.randn(2, 6, 32)\n",
+    "out = mha.forward(x, x, x)\n",
+    "print(\"Self-attn shape:\", out.shape)\n",
+    "\n",
+    "Q = torch.randn(1, 3, 32)\n",
+    "K = torch.randn(1, 7, 32)\n",
+    "V = torch.randn(1, 7, 32)\n",
+    "out2 = mha.forward(Q, K, V)\n",
+    "print(\"Cross-attn shape:\", out2.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Run judge\n",
+    "from torch_judge import check\n",
+    "check(\"mha\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/solutions/07_batchnorm_solution.ipynb b/solutions/07_batchnorm_solution.ipynb
index 7ddca6a71cc43ce27394243ae598caf69654c79e..a108cc1171e3fa62f80dff326d4860528d6c7565 100644
--- a/solutions/07_batchnorm_solution.ipynb
+++ b/solutions/07_batchnorm_solution.ipynb
@@ -1,117 +1,132 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "ffd42526",
-      "metadata": {},
-      "source": [
-        "# 🟡 Solution: Implement BatchNorm\n",
-        "\n",
-        "Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "70488b9f",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SOLUTION\n",
-        "\n",
-        "import torch\n",
-        "\n",
-        "def my_batch_norm(\n",
-        "    x,\n",
-        "    gamma,\n",
-        "    beta,\n",
-        "    running_mean,\n",
-        "    running_var,\n",
-        "    eps=1e-5,\n",
-        "    momentum=0.1,\n",
-        "    training=True,\n",
-        "):\n",
-        "    \"\"\"BatchNorm with train/eval behavior and running stats.\n",
-        "\n",
-        "    - Training: use batch stats, update running_mean / running_var in-place.\n",
-        "    - Inference: use running_mean / running_var as-is.\n",
-        "    \"\"\"\n",
-        "    if training:\n",
-        "        batch_mean = x.mean(dim=0)\n",
-        "        batch_var = x.var(dim=0, unbiased=False)\n",
-        "\n",
-        "        # Update running statistics in-place. Detach to avoid tracking gradients.\n",
-        "        running_mean.mul_(1 - momentum).add_(momentum * batch_mean.detach())\n",
-        "        running_var.mul_(1 - momentum).add_(momentum * batch_var.detach())\n",
-        "\n",
-        "        mean = batch_mean\n",
-        "        var = batch_var\n",
-        "    else:\n",
-        "        mean = running_mean\n",
-        "        var = running_var\n",
-        "\n",
-        "    x_norm = (x - mean) / torch.sqrt(var + eps)\n",
-        "    return gamma * x_norm + beta"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "dbd7bb4e",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Verify\n",
-        "x = torch.randn(8, 4)\n",
-        "gamma = torch.ones(4)\n",
-        "beta = torch.zeros(4)\n",
-        "\n",
-        "running_mean = torch.zeros(4)\n",
-        "running_var = torch.ones(4)\n",
-        "\n",
-        "# Training behavior: normalize with batch stats and update running stats\n",
-        "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
-        "print(\"[Train] Column means:\", out_train.mean(dim=0))\n",
-        "print(\"[Train] Column stds: \", out_train.std(dim=0))\n",
-        "print(\"Updated running_mean:\", running_mean)\n",
-        "print(\"Updated running_var:\", running_var)\n",
-        "\n",
-        "# Inference behavior: use running_mean / running_var only\n",
-        "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
-        "print(\"[Eval] Column means (using running stats):\", out_eval.mean(dim=0))"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "from torch_judge import check\n",
-        "check('batchnorm')"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11.0"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "ffd42526",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb)\n\n",
+    "# 🟡 Solution: Implement BatchNorm\n",
+    "\n",
+    "Reference solution for Batch Normalization with both **training** and **inference** behavior, including running mean/variance updates."
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "70488b9f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SOLUTION\n",
+    "\n",
+    "import torch\n",
+    "\n",
+    "def my_batch_norm(\n",
+    "    x,\n",
+    "    gamma,\n",
+    "    beta,\n",
+    "    running_mean,\n",
+    "    running_var,\n",
+    "    eps=1e-5,\n",
+    "    momentum=0.1,\n",
+    "    training=True,\n",
+    "):\n",
+    "    \"\"\"BatchNorm with train/eval behavior and running stats.\n",
+    "\n",
+    "    - Training: use batch stats, update running_mean / running_var in-place.\n",
+    "    - Inference: use running_mean / running_var as-is.\n",
+    "    \"\"\"\n",
+    "    if training:\n",
+    "        batch_mean = x.mean(dim=0)\n",
+    "        batch_var = x.var(dim=0, unbiased=False)\n",
+    "\n",
+    "        # Update running statistics in-place. Detach to avoid tracking gradients.\n",
+    "        running_mean.mul_(1 - momentum).add_(momentum * batch_mean.detach())\n",
+    "        running_var.mul_(1 - momentum).add_(momentum * batch_var.detach())\n",
+    "\n",
+    "        mean = batch_mean\n",
+    "        var = batch_var\n",
+    "    else:\n",
+    "        mean = running_mean\n",
+    "        var = running_var\n",
+    "\n",
+    "    x_norm = (x - mean) / torch.sqrt(var + eps)\n",
+    "    return gamma * x_norm + beta"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "dbd7bb4e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Verify\n",
+    "x = torch.randn(8, 4)\n",
+    "gamma = torch.ones(4)\n",
+    "beta = torch.zeros(4)\n",
+    "\n",
+    "running_mean = torch.zeros(4)\n",
+    "running_var = torch.ones(4)\n",
+    "\n",
+    "# Training behavior: normalize with batch stats and update running stats\n",
+    "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
+    "print(\"[Train] Column means:\", out_train.mean(dim=0))\n",
+    "print(\"[Train] Column stds: \", out_train.std(dim=0))\n",
+    "print(\"Updated running_mean:\", running_mean)\n",
+    "print(\"Updated running_var:\", running_var)\n",
+    "\n",
+    "# Inference behavior: use running_mean / running_var only\n",
+    "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
+    "print(\"[Eval] Column means (using running stats):\", out_eval.mean(dim=0))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from torch_judge import check\n",
+    "check('batchnorm')"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/solutions/08_rmsnorm_solution.ipynb b/solutions/08_rmsnorm_solution.ipynb
index f5b38d33907c06d997b23ac300f40b215dde266a..0d580568cef9a2e45775e50e94dbbcd72b265675 100644
--- a/solutions/08_rmsnorm_solution.ipynb
+++ b/solutions/08_rmsnorm_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb)\n\n",
     "# 🟡 Solution: Implement RMSNorm\n",
     "\n",
     "Reference solution for Root Mean Square Normalization."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -67,4 +82,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/09_causal_attention_solution.ipynb b/solutions/09_causal_attention_solution.ipynb
index f09ae03534b7b908941efd8705dae5fe78e50787..995bcfb9ab314ce37ef765119c56e08367c258cf 100644
--- a/solutions/09_causal_attention_solution.ipynb
+++ b/solutions/09_causal_attention_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Causal Self-Attention\n",
     "\n",
     "Reference solution — softmax attention with an upper-triangular mask."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -77,4 +92,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/10_gqa_solution.ipynb b/solutions/10_gqa_solution.ipynb
index 6daf9c0542256e7d47aa443ffc7041fdb46e3ffe..fe0c5457401d55967bdd803179217889c52ab9df 100644
--- a/solutions/10_gqa_solution.ipynb
+++ b/solutions/10_gqa_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb)\n\n",
     "# 🔴 Solution: Grouped Query Attention\n",
     "\n",
     "Reference solution for GQA — MHA with shared KV heads."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -88,4 +103,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/11_sliding_window_solution.ipynb b/solutions/11_sliding_window_solution.ipynb
index 16adab4e60908d416ff033142382a1301e2ed2bf..54b77aaab9e265872a315e194141c5f494bfb88f 100644
--- a/solutions/11_sliding_window_solution.ipynb
+++ b/solutions/11_sliding_window_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb)\n\n",
     "# 🔴 Solution: Sliding Window Attention\n",
     "\n",
     "Reference solution — softmax attention with a band mask."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -73,4 +88,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/12_linear_attention_solution.ipynb b/solutions/12_linear_attention_solution.ipynb
index 4eb3df41e45fb79b375b91b612237545379b2562..a16ec271cf082f1eb131a5b9a11caad262be6fdd 100644
--- a/solutions/12_linear_attention_solution.ipynb
+++ b/solutions/12_linear_attention_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb)\n\n",
     "# 🔴 Solution: Linear Self-Attention\n",
     "\n",
     "Reference solution — kernel-based attention with elu+1 feature map."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -72,4 +87,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/13_gpt2_block_solution.ipynb b/solutions/13_gpt2_block_solution.ipynb
index eedfc3adf161df2579fc0937423de4e63fa8f1e0..ea77036e3d4f1d49c2a7197f610f511a40200c6a 100644
--- a/solutions/13_gpt2_block_solution.ipynb
+++ b/solutions/13_gpt2_block_solution.ipynb
@@ -17,12 +17,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb)\n\n",
     "# 🔴 Solution: GPT-2 Transformer Block\n",
     "\n",
     "Reference solution — pre-norm, causal self-attention, 4x MLP with GELU."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -102,4 +117,4 @@
    "execution_count": null
   }
  ]
-}
\ No newline at end of file
+}
diff --git a/solutions/14_kv_cache_solution.ipynb b/solutions/14_kv_cache_solution.ipynb
index 2ab655582cdd230d4dbd6de2a8d98ef129368400..650149df1fd0a83c51e6f29f5fa3e86bf78e5b5e 100644
--- a/solutions/14_kv_cache_solution.ipynb
+++ b/solutions/14_kv_cache_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# \ud83d\udd34 Solution: KV Cache Attention\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb)\n\n",
+    "# 🔴 Solution: KV Cache Attention\n",
     "\n",
-    "Reference solution \u2014 multi-head attention with KV caching for autoregressive inference."
+    "Reference solution — multi-head attention with KV caching for autoregressive inference."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -26,7 +41,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# \u2705 SOLUTION\n",
+    "# ✅ SOLUTION\n",
     "\n",
     "class KVCacheAttention(nn.Module):\n",
     "    def __init__(self, d_model, num_heads):\n",
diff --git a/solutions/15_mlp_solution.ipynb b/solutions/15_mlp_solution.ipynb
index 4d9ac05ffd7de5aeeeb1718a063a2f0e55819ae5..ae3759b43aaa819c4a2084b7a6734eea1e19270c 100644
--- a/solutions/15_mlp_solution.ipynb
+++ b/solutions/15_mlp_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# \ud83d\udfe0 Solution: SwiGLU MLP\n",
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb)\n\n",
+    "# 🟠 Solution: SwiGLU MLP\n",
     "\n",
-    "Reference solution \u2014 gated feed-forward network used in LLaMA, Mistral, and PaLM."
+    "Reference solution — gated feed-forward network used in LLaMA, Mistral, and PaLM."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -26,7 +41,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# \u2705 SOLUTION\n",
+    "# ✅ SOLUTION\n",
     "\n",
     "class SwiGLUMLP(nn.Module):\n",
     "    def __init__(self, d_model, d_ff):\n",
diff --git a/solutions/16_cross_entropy_solution.ipynb b/solutions/16_cross_entropy_solution.ipynb
index bca9c2712b75a564b9798424640af972f994bd52..a8945b0b9274f474f5d68a97909714f1fa88326f 100644
--- a/solutions/16_cross_entropy_solution.ipynb
+++ b/solutions/16_cross_entropy_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb)\n\n",
     "# Solution: Cross-Entropy Loss\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -69,4 +84,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/17_dropout_solution.ipynb b/solutions/17_dropout_solution.ipynb
index 4812d3a80862cbbcd2eb2857bb1b0adecfd0929c..1ce4b5d5bfe82ff7487132305912c81e465eef6f 100644
--- a/solutions/17_dropout_solution.ipynb
+++ b/solutions/17_dropout_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb)\n\n",
     "# Solution: Implement Dropout\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -79,4 +94,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/18_embedding_solution.ipynb b/solutions/18_embedding_solution.ipynb
index 17926659b5feeee27bdf18c4c33aa802fc550fce..d1330e0c0d4ae3a2f7d5412a3828bd28e63e612c 100644
--- a/solutions/18_embedding_solution.ipynb
+++ b/solutions/18_embedding_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb)\n\n",
     "# Solution: Embedding Layer\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -74,4 +89,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/19_gelu_solution.ipynb b/solutions/19_gelu_solution.ipynb
index 3e3308bcaaf7226234a71019a6f1c2fb97d22102..368e713ccca32c7bac97b30c06141fdc2f18bbaf 100644
--- a/solutions/19_gelu_solution.ipynb
+++ b/solutions/19_gelu_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb)\n\n",
     "# Solution: GELU Activation\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -68,4 +83,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/20_weight_init_solution.ipynb b/solutions/20_weight_init_solution.ipynb
index 6a9cc4c662896bb3c8679ba4c61ff3072cac6c16..c8a63dfe7f8dd3ae78ae2b67936f3fb9c63e8459 100644
--- a/solutions/20_weight_init_solution.ipynb
+++ b/solutions/20_weight_init_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb)\n\n",
     "# Solution: Kaiming Initialization\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -74,4 +89,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/21_gradient_clipping_solution.ipynb b/solutions/21_gradient_clipping_solution.ipynb
index 2a084c0c0c1c82a2e36fa190bc59776402d43acf..0bfce2476216304d9501413d68cf44b22f781b9d 100644
--- a/solutions/21_gradient_clipping_solution.ipynb
+++ b/solutions/21_gradient_clipping_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb)\n\n",
     "# Solution: Gradient Norm Clipping\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -76,4 +91,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/22_conv2d_solution.ipynb b/solutions/22_conv2d_solution.ipynb
index 3b8c2cc181f535c2f641c96df4b856549aa8deb9..3a49c5fb61cedbddfac4dc93aad1a33ecf20a918 100644
--- a/solutions/22_conv2d_solution.ipynb
+++ b/solutions/22_conv2d_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb)\n\n",
     "# Solution: 2D Convolution\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -79,4 +94,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/23_cross_attention_solution.ipynb b/solutions/23_cross_attention_solution.ipynb
index 16b70a33fc9f7966287328f30f2ee7923f479b46..bb7cc66158959982971d7674adfbfe1099d75a9a 100644
--- a/solutions/23_cross_attention_solution.ipynb
+++ b/solutions/23_cross_attention_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb)\n\n",
     "# Solution: Multi-Head Cross-Attention\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -88,4 +103,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/24_rope_solution.ipynb b/solutions/24_rope_solution.ipynb
index 01d3908c3c0103d39cdf870b4b781bb469564d2e..02829b3e4fa56ad9a58832d59c72ce1e9d4e8a83 100644
--- a/solutions/24_rope_solution.ipynb
+++ b/solutions/24_rope_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb)\n\n",
     "# Solution: Rotary Position Embedding (RoPE)\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -83,4 +98,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/25_flash_attention_solution.ipynb b/solutions/25_flash_attention_solution.ipynb
index 29f7579c0a9766a1959e5d4034b9d467f3aa6ed2..31135d6c7664054e2290ce9195bb7d78e304a82d 100644
--- a/solutions/25_flash_attention_solution.ipynb
+++ b/solutions/25_flash_attention_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb)\n\n",
     "# Solution: Flash Attention (Tiled)\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -92,4 +107,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/26_lora_solution.ipynb b/solutions/26_lora_solution.ipynb
index f99b35378d53e77317d3c6985d73b8e04b804a06..49e1e8c0b5c046bbde3b8e37d04e427ff42e4553 100644
--- a/solutions/26_lora_solution.ipynb
+++ b/solutions/26_lora_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb)\n\n",
     "# Solution: LoRA (Low-Rank Adaptation)\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -81,4 +96,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/27_vit_patch_solution.ipynb b/solutions/27_vit_patch_solution.ipynb
index 9dfdccf38d6ff54b903c6afa810a32f8c178f13a..73004c40b8df8bddcf5e2cdc0ac2b9711b8b4f1f 100644
--- a/solutions/27_vit_patch_solution.ipynb
+++ b/solutions/27_vit_patch_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb)\n\n",
     "# Solution: ViT Patch Embedding\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -81,4 +96,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/28_moe_solution.ipynb b/solutions/28_moe_solution.ipynb
index d12eb4a4a4ea6939568f4fbc3721df589a6af37a..5c1eb971bb38fd7d6305f7b2313192025ad8046d 100644
--- a/solutions/28_moe_solution.ipynb
+++ b/solutions/28_moe_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb)\n\n",
     "# Solution: Mixture of Experts (MoE)\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -94,4 +109,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/29_adam_solution.ipynb b/solutions/29_adam_solution.ipynb
index 3cb504c61a01c03165d8e97e0ad21175ddf7bc31..c31ae3bef7134766cb2a8f83eadeaf532584886c 100644
--- a/solutions/29_adam_solution.ipynb
+++ b/solutions/29_adam_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb)\n\n",
     "# Solution: Adam Optimizer\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -97,4 +112,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/30_cosine_lr_solution.ipynb b/solutions/30_cosine_lr_solution.ipynb
index 331d5c63df8702f60fb64a24e0e57fe1ca62a0fd..74924cd0c1cfefbc00645a80adc2a321d1ddab6f 100644
--- a/solutions/30_cosine_lr_solution.ipynb
+++ b/solutions/30_cosine_lr_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb)\n\n",
     "# Solution: Cosine LR Scheduler with Warmup\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -71,4 +86,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/31_gradient_accumulation_solution.ipynb b/solutions/31_gradient_accumulation_solution.ipynb
index 0e61737c2280fc251ac532576490ba2b96e9a827..c289074efa7c3b09a570b9586b58ba35e7434613 100644
--- a/solutions/31_gradient_accumulation_solution.ipynb
+++ b/solutions/31_gradient_accumulation_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb)\n\n",
     "# Solution: Gradient Accumulation\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -78,4 +93,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/32_topk_sampling_solution.ipynb b/solutions/32_topk_sampling_solution.ipynb
index a890bb487e253a1ded35229449ed342164643576..31de06714ab4d1763c879f834bf3845e63deda10 100644
--- a/solutions/32_topk_sampling_solution.ipynb
+++ b/solutions/32_topk_sampling_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb)\n\n",
     "# Solution: Top-k / Top-p Sampling\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -79,4 +94,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/33_beam_search_solution.ipynb b/solutions/33_beam_search_solution.ipynb
index ff4cf30a29af0e038f4e6d9b53b5e17096db7cb5..969edfa1fd571cbd47e1b31b4ee7dc357cac49f4 100644
--- a/solutions/33_beam_search_solution.ipynb
+++ b/solutions/33_beam_search_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb)\n\n",
     "# Solution: Beam Search Decoding\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -88,4 +103,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/34_speculative_decoding_solution.ipynb b/solutions/34_speculative_decoding_solution.ipynb
index 066a46abb6458e498b99fa461a8273d46317725d..c13674ae199fb1a6577025d9825a698bdc89fe24 100644
--- a/solutions/34_speculative_decoding_solution.ipynb
+++ b/solutions/34_speculative_decoding_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb)\n\n",
     "# Solution: Speculative Decoding\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -84,4 +99,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/35_bpe_solution.ipynb b/solutions/35_bpe_solution.ipynb
index 65be8b2d23417dd74ec3ec3d2991f2566435feee..1b0d1e55ea1cb334517557953496ce756bfae1cb 100644
--- a/solutions/35_bpe_solution.ipynb
+++ b/solutions/35_bpe_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb)\n\n",
     "# Solution: Byte-Pair Encoding (BPE)\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -113,4 +128,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/36_int8_quantization_solution.ipynb b/solutions/36_int8_quantization_solution.ipynb
index 8750818fce7100272be391a1df384f015d63ea9a..5a5e3ecaa44d44029ebc317ac8623aa6d9a87033 100644
--- a/solutions/36_int8_quantization_solution.ipynb
+++ b/solutions/36_int8_quantization_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb)\n\n",
     "# Solution: INT8 Quantized Linear\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -83,4 +98,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/37_dpo_loss_solution.ipynb b/solutions/37_dpo_loss_solution.ipynb
index b4bb454a7ec0f9b38f7a790ab586b784d32fd01b..0606b841f82a4543941a67be500c57be54ba07b7 100644
--- a/solutions/37_dpo_loss_solution.ipynb
+++ b/solutions/37_dpo_loss_solution.ipynb
@@ -4,12 +4,27 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb)\n\n",
     "# Solution: DPO (Direct Preference Optimization) Loss\n",
     "\n",
     "Reference solution."
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
@@ -73,4 +88,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/solutions/38_grpo_loss_solution.ipynb b/solutions/38_grpo_loss_solution.ipynb
index d18ac5dff1a8923212cf9b491d4dca3a828dad26..05ce2f62ae801ce5f1924f5a900bb5bc3d55f0da 100644
--- a/solutions/38_grpo_loss_solution.ipynb
+++ b/solutions/38_grpo_loss_solution.ipynb
@@ -4,11 +4,26 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb)\n\n",
     "# Solution: GRPO (Group Relative Policy Optimization) Loss\n",
     "\n",
     "Reference solution."
    ]
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/solutions/39_ppo_loss_solution.ipynb b/solutions/39_ppo_loss_solution.ipynb
index 62eef562bbc3240081d1bd42876c79a939dced76..1818a3277239c16929039bfe22442d0122e6544c 100644
--- a/solutions/39_ppo_loss_solution.ipynb
+++ b/solutions/39_ppo_loss_solution.ipynb
@@ -4,11 +4,26 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb)\n\n",
     "# Solution: PPO Clipped Loss\n",
     "\n",
     "Reference solution for the PPO clipped surrogate loss task.\n"
    ]
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "execution_count": null,
@@ -89,4 +104,3 @@
  "nbformat": 4,
  "nbformat_minor": 5
 }
-
diff --git a/solutions/40_linear_regression_solution.ipynb b/solutions/40_linear_regression_solution.ipynb
index 8ce35d9579af5d129f414c9dfad574e53fbb79cf..a7452934306e4076367f985695d53f02f98c1856 100644
--- a/solutions/40_linear_regression_solution.ipynb
+++ b/solutions/40_linear_regression_solution.ipynb
@@ -1,125 +1,140 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "# 🟡 Solution: Linear Regression\n",
-        "\n",
-        "Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn"
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SOLUTION\n",
-        "\n",
-        "class LinearRegression:\n",
-        "    def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
-        "        \"\"\"Normal equation via augmented matrix.\"\"\"\n",
-        "        N, D = X.shape\n",
-        "        # Augment X with ones column for bias\n",
-        "        X_aug = torch.cat([X, torch.ones(N, 1)], dim=1)  # (N, D+1)\n",
-        "        # Solve (X^T X) theta = X^T y\n",
-        "        theta = torch.linalg.lstsq(X_aug, y).solution      # (D+1,)\n",
-        "        w = theta[:D]\n",
-        "        b = theta[D]\n",
-        "        return w.detach(), b.detach()\n",
-        "\n",
-        "    def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
-        "                         lr: float = 0.01, steps: int = 1000):\n",
-        "        \"\"\"Manual gradient computation — no autograd.\"\"\"\n",
-        "        N, D = X.shape\n",
-        "        w = torch.zeros(D)\n",
-        "        b = torch.tensor(0.0)\n",
-        "\n",
-        "        for _ in range(steps):\n",
-        "            pred = X @ w + b          # (N,)\n",
-        "            error = pred - y           # (N,)\n",
-        "            grad_w = (2.0 / N) * (X.T @ error)  # (D,)\n",
-        "            grad_b = (2.0 / N) * error.sum()     # scalar\n",
-        "            w = w - lr * grad_w\n",
-        "            b = b - lr * grad_b\n",
-        "\n",
-        "        return w, b\n",
-        "\n",
-        "    def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
-        "                  lr: float = 0.01, steps: int = 1000):\n",
-        "        \"\"\"PyTorch nn.Linear with autograd training loop.\"\"\"\n",
-        "        N, D = X.shape\n",
-        "        layer = nn.Linear(D, 1)\n",
-        "        optimizer = torch.optim.SGD(layer.parameters(), lr=lr)\n",
-        "        loss_fn = nn.MSELoss()\n",
-        "\n",
-        "        for _ in range(steps):\n",
-        "            optimizer.zero_grad()\n",
-        "            pred = layer(X).squeeze(-1)  # (N,)\n",
-        "            loss = loss_fn(pred, y)\n",
-        "            loss.backward()\n",
-        "            optimizer.step()\n",
-        "\n",
-        "        w = layer.weight.data.squeeze(0)  # (D,)\n",
-        "        b = layer.bias.data.squeeze(0)    # scalar ()\n",
-        "        return w, b"
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# Verify\n",
-        "torch.manual_seed(42)\n",
-        "X = torch.randn(100, 3)\n",
-        "true_w = torch.tensor([2.0, -1.0, 0.5])\n",
-        "y = X @ true_w + 3.0\n",
-        "\n",
-        "model = LinearRegression()\n",
-        "for name, method in [(\"Closed-form\", model.closed_form),\n",
-        "                      (\"Grad Descent\", lambda X, y: model.gradient_descent(X, y, lr=0.05, steps=2000)),\n",
-        "                      (\"nn.Linear\", lambda X, y: model.nn_linear(X, y, lr=0.05, steps=2000))]:\n",
-        "    w, b = method(X, y)\n",
-        "    print(f\"{name:13s}  w={w.tolist()}  b={b.item():.4f}\")\n",
-        "print(f\"{'True':13s}  w={true_w.tolist()}  b=3.0000\")"
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SUBMIT\n",
-        "from torch_judge import check\n",
-        "check(\"linear_regression\")"
-      ],
-      "execution_count": null
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11.0"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb)\n\n",
+    "# 🟡 Solution: Linear Regression\n",
+    "\n",
+    "Reference solution demonstrating closed-form, gradient descent, and nn.Linear approaches."
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 4
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn"
+   ],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SOLUTION\n",
+    "\n",
+    "class LinearRegression:\n",
+    "    def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
+    "        \"\"\"Normal equation via augmented matrix.\"\"\"\n",
+    "        N, D = X.shape\n",
+    "        # Augment X with ones column for bias\n",
+    "        X_aug = torch.cat([X, torch.ones(N, 1)], dim=1)  # (N, D+1)\n",
+    "        # Solve (X^T X) theta = X^T y\n",
+    "        theta = torch.linalg.lstsq(X_aug, y).solution      # (D+1,)\n",
+    "        w = theta[:D]\n",
+    "        b = theta[D]\n",
+    "        return w.detach(), b.detach()\n",
+    "\n",
+    "    def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
+    "                         lr: float = 0.01, steps: int = 1000):\n",
+    "        \"\"\"Manual gradient computation — no autograd.\"\"\"\n",
+    "        N, D = X.shape\n",
+    "        w = torch.zeros(D)\n",
+    "        b = torch.tensor(0.0)\n",
+    "\n",
+    "        for _ in range(steps):\n",
+    "            pred = X @ w + b          # (N,)\n",
+    "            error = pred - y           # (N,)\n",
+    "            grad_w = (2.0 / N) * (X.T @ error)  # (D,)\n",
+    "            grad_b = (2.0 / N) * error.sum()     # scalar\n",
+    "            w = w - lr * grad_w\n",
+    "            b = b - lr * grad_b\n",
+    "\n",
+    "        return w, b\n",
+    "\n",
+    "    def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
+    "                  lr: float = 0.01, steps: int = 1000):\n",
+    "        \"\"\"PyTorch nn.Linear with autograd training loop.\"\"\"\n",
+    "        N, D = X.shape\n",
+    "        layer = nn.Linear(D, 1)\n",
+    "        optimizer = torch.optim.SGD(layer.parameters(), lr=lr)\n",
+    "        loss_fn = nn.MSELoss()\n",
+    "\n",
+    "        for _ in range(steps):\n",
+    "            optimizer.zero_grad()\n",
+    "            pred = layer(X).squeeze(-1)  # (N,)\n",
+    "            loss = loss_fn(pred, y)\n",
+    "            loss.backward()\n",
+    "            optimizer.step()\n",
+    "\n",
+    "        w = layer.weight.data.squeeze(0)  # (D,)\n",
+    "        b = layer.bias.data.squeeze(0)    # scalar ()\n",
+    "        return w, b"
+   ],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Verify\n",
+    "torch.manual_seed(42)\n",
+    "X = torch.randn(100, 3)\n",
+    "true_w = torch.tensor([2.0, -1.0, 0.5])\n",
+    "y = X @ true_w + 3.0\n",
+    "\n",
+    "model = LinearRegression()\n",
+    "for name, method in [(\"Closed-form\", model.closed_form),\n",
+    "                      (\"Grad Descent\", lambda X, y: model.gradient_descent(X, y, lr=0.05, steps=2000)),\n",
+    "                      (\"nn.Linear\", lambda X, y: model.nn_linear(X, y, lr=0.05, steps=2000))]:\n",
+    "    w, b = method(X, y)\n",
+    "    print(f\"{name:13s}  w={w.tolist()}  b={b.item():.4f}\")\n",
+    "print(f\"{'True':13s}  w={true_w.tolist()}  b=3.0000\")"
+   ],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SUBMIT\n",
+    "from torch_judge import check\n",
+    "check(\"linear_regression\")"
+   ],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
 }
diff --git a/templates/00_welcome.ipynb b/templates/00_welcome.ipynb
index 231cb9f5bc373dcd7f79ec00c92e6b3319570622..c3498f57061bd40204da0d9bc3c886057ce6eb41 100644
--- a/templates/00_welcome.ipynb
+++ b/templates/00_welcome.ipynb
@@ -31,24 +31,120 @@
     "\n",
     "> 💡 Every notebook also has a **Colab** toolbar button and an **Open in Colab** badge — use them to run problems in Google Colab with zero setup.\n",
     "\n",
-    "## Quick Start"
+    "## Quick Start\n",
+    "\n",
+    "📖 **Reference solutions in Colab**: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb) — Start with ReLU. Or use the **Colab** links in the table below for each solution."
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": null,
    "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
    "source": [
     "from torch_judge import status\n",
     "status()"
-   ],
-   "execution_count": null,
-   "outputs": []
+   ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Problem List (40 problems)\n\n### 🧱 Fundamentals — \"Implement X from scratch\"\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 1 | ReLU | 🟢 Easy | [Open](01_relu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">Colab</a> | [Open](01_relu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 2 | Softmax | 🟢 Easy | [Open](02_softmax.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">Colab</a> | [Open](02_softmax_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 16 | Cross-Entropy Loss | 🟢 Easy | [Open](16_cross_entropy.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">Colab</a> | [Open](16_cross_entropy_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 17 | Dropout | 🟢 Easy | [Open](17_dropout.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">Colab</a> | [Open](17_dropout_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 18 | Embedding | 🟢 Easy | [Open](18_embedding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">Colab</a> | [Open](18_embedding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 19 | GELU | 🟢 Easy | [Open](19_gelu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">Colab</a> | [Open](19_gelu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 20 | Kaiming Init | 🟢 Easy | [Open](20_weight_init.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">Colab</a> | [Open](20_weight_init_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 21 | Gradient Clipping | 🟢 Easy | [Open](21_gradient_clipping.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">Colab</a> | [Open](21_gradient_clipping_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 31 | Gradient Accumulation | 🟢 Easy | [Open](31_gradient_accumulation.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">Colab</a> | [Open](31_gradient_accumulation_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 3 | Linear Layer | 🟡 Medium | [Open](03_linear.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">Colab</a> | [Open](03_linear_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 4 | LayerNorm | 🟡 Medium | [Open](04_layernorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">Colab</a> | [Open](04_layernorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 7 | BatchNorm | 🟡 Medium | [Open](07_batchnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](07_batchnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 8 | RMSNorm | 🟡 Medium | [Open](08_rmsnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](08_rmsnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 15 | SwiGLU MLP | 🟡 Medium | [Open](15_mlp.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">Colab</a> | [Open](15_mlp_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 22 | Conv2d | 🟡 Medium | [Open](22_conv2d.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">Colab</a> | [Open](22_conv2d_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🧠 Attention Mechanisms\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 23 | Cross-Attention | 🟡 Medium | [Open](23_cross_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](23_cross_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 5 | Scaled Dot-Product Attention | 🔴 Hard | [Open](05_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](05_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 6 | Multi-Head Attention | 🔴 Hard | [Open](06_multihead_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](06_multihead_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 9 | Causal Self-Attention | 🔴 Hard | [Open](09_causal_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](09_causal_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 10 | Grouped Query Attention | 🔴 Hard | [Open](10_gqa.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">Colab</a> | [Open](10_gqa_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 11 | Sliding Window Attention | 🔴 Hard | [Open](11_sliding_window.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">Colab</a> | [Open](11_sliding_window_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 12 | Linear Attention | 🔴 Hard | [Open](12_linear_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](12_linear_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 14 | KV Cache Attention | 🔴 Hard | [Open](14_kv_cache.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">Colab</a> | [Open](14_kv_cache_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 24 | RoPE | 🔴 Hard | [Open](24_rope.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">Colab</a> | [Open](24_rope_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 25 | Flash Attention | 🔴 Hard | [Open](25_flash_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](25_flash_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🏗️ Architecture & Adaptation\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 26 | LoRA | 🟡 Medium | [Open](26_lora.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">Colab</a> | [Open](26_lora_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 27 | ViT Patch Embedding | 🟡 Medium | [Open](27_vit_patch.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">Colab</a> | [Open](27_vit_patch_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 13 | GPT-2 Block | 🔴 Hard | [Open](13_gpt2_block.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">Colab</a> | [Open](13_gpt2_block_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 28 | Mixture of Experts | 🔴 Hard | [Open](28_moe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">Colab</a> | [Open](28_moe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### ⚙️ Training & Optimization\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 29 | Adam Optimizer | 🟡 Medium | [Open](29_adam.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">Colab</a> | [Open](29_adam_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 30 | Cosine LR Scheduler | 🟡 Medium | [Open](30_cosine_lr.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">Colab</a> | [Open](30_cosine_lr_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 40 | Linear Regression | 🟡 Medium | [Open](40_linear_regression.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">Colab</a> | [Open](40_linear_regression_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🎯 Inference & Decoding\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 32 | Top-k / Top-p Sampling | 🟡 Medium | [Open](32_topk_sampling.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">Colab</a> | [Open](32_topk_sampling_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 33 | Beam Search | 🟡 Medium | [Open](33_beam_search.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">Colab</a> | [Open](33_beam_search_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 34 | Speculative Decoding | 🔴 Hard | [Open](34_speculative_decoding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">Colab</a> | [Open](34_speculative_decoding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n### 🔬 Advanced\n\n| # | Problem | Difficulty | Template | Solution |\n|:---:|---------|:----------:|:--------:|:--------:|\n| 35 | BPE Tokenizer | 🔴 Hard | [Open](35_bpe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">Colab</a> | [Open](35_bpe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 36 | INT8 Quantization | 🔴 Hard | [Open](36_int8_quantization.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">Colab</a> | [Open](36_int8_quantization_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 37 | DPO Loss | 🔴 Hard | [Open](37_dpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](37_dpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 38 | GRPO Loss | 🔴 Hard | [Open](38_grpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](38_grpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n| 39 | PPO Loss | 🔴 Hard | [Open](39_ppo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](39_ppo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n\n## Useful Commands\n\n```python\nfrom torch_judge import check, hint, status\n\nstatus()                   # Progress dashboard\ncheck(\"relu\")              # Judge your implementation\nhint(\"causal_attention\")   # Get a hint\n```"
+    "## Problem List (40 problems)\n",
+    "\n",
+    "### 🧱 Fundamentals — \"Implement X from scratch\"\n",
+    "\n",
+    "| # | Problem | Difficulty | Template | Solution |\n",
+    "|:---:|---------|:----------:|:--------:|:--------:|\n",
+    "| 1 | ReLU | 🟢 Easy | [Open](01_relu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/01_relu.ipynb\" target=\"_blank\">Colab</a> | [Open](01_relu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/01_relu_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 2 | Softmax | 🟢 Easy | [Open](02_softmax.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/02_softmax.ipynb\" target=\"_blank\">Colab</a> | [Open](02_softmax_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/02_softmax_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 16 | Cross-Entropy Loss | 🟢 Easy | [Open](16_cross_entropy.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/16_cross_entropy.ipynb\" target=\"_blank\">Colab</a> | [Open](16_cross_entropy_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/16_cross_entropy_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 17 | Dropout | 🟢 Easy | [Open](17_dropout.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/17_dropout.ipynb\" target=\"_blank\">Colab</a> | [Open](17_dropout_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/17_dropout_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 18 | Embedding | 🟢 Easy | [Open](18_embedding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/18_embedding.ipynb\" target=\"_blank\">Colab</a> | [Open](18_embedding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/18_embedding_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 19 | GELU | 🟢 Easy | [Open](19_gelu.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/19_gelu.ipynb\" target=\"_blank\">Colab</a> | [Open](19_gelu_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/19_gelu_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 20 | Kaiming Init | 🟢 Easy | [Open](20_weight_init.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/20_weight_init.ipynb\" target=\"_blank\">Colab</a> | [Open](20_weight_init_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/20_weight_init_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 21 | Gradient Clipping | 🟢 Easy | [Open](21_gradient_clipping.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/21_gradient_clipping.ipynb\" target=\"_blank\">Colab</a> | [Open](21_gradient_clipping_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/21_gradient_clipping_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 31 | Gradient Accumulation | 🟢 Easy | [Open](31_gradient_accumulation.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/31_gradient_accumulation.ipynb\" target=\"_blank\">Colab</a> | [Open](31_gradient_accumulation_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/31_gradient_accumulation_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 3 | Linear Layer | 🟡 Medium | [Open](03_linear.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/03_linear.ipynb\" target=\"_blank\">Colab</a> | [Open](03_linear_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/03_linear_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 4 | LayerNorm | 🟡 Medium | [Open](04_layernorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/04_layernorm.ipynb\" target=\"_blank\">Colab</a> | [Open](04_layernorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/04_layernorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 7 | BatchNorm | 🟡 Medium | [Open](07_batchnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](07_batchnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/07_batchnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 8 | RMSNorm | 🟡 Medium | [Open](08_rmsnorm.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/08_rmsnorm.ipynb\" target=\"_blank\">Colab</a> | [Open](08_rmsnorm_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/08_rmsnorm_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 15 | SwiGLU MLP | 🟡 Medium | [Open](15_mlp.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/15_mlp.ipynb\" target=\"_blank\">Colab</a> | [Open](15_mlp_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/15_mlp_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 22 | Conv2d | 🟡 Medium | [Open](22_conv2d.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/22_conv2d.ipynb\" target=\"_blank\">Colab</a> | [Open](22_conv2d_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/22_conv2d_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "\n",
+    "### 🧠 Attention Mechanisms\n",
+    "\n",
+    "| # | Problem | Difficulty | Template | Solution |\n",
+    "|:---:|---------|:----------:|:--------:|:--------:|\n",
+    "| 23 | Cross-Attention | 🟡 Medium | [Open](23_cross_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/23_cross_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](23_cross_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/23_cross_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 5 | Scaled Dot-Product Attention | 🔴 Hard | [Open](05_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/05_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](05_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/05_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 6 | Multi-Head Attention | 🔴 Hard | [Open](06_multihead_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/06_multihead_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](06_multihead_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/06_multihead_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 9 | Causal Self-Attention | 🔴 Hard | [Open](09_causal_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/09_causal_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](09_causal_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/09_causal_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 10 | Grouped Query Attention | 🔴 Hard | [Open](10_gqa.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/10_gqa.ipynb\" target=\"_blank\">Colab</a> | [Open](10_gqa_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/10_gqa_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 11 | Sliding Window Attention | 🔴 Hard | [Open](11_sliding_window.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/11_sliding_window.ipynb\" target=\"_blank\">Colab</a> | [Open](11_sliding_window_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/11_sliding_window_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 12 | Linear Attention | 🔴 Hard | [Open](12_linear_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/12_linear_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](12_linear_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/12_linear_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 14 | KV Cache Attention | 🔴 Hard | [Open](14_kv_cache.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/14_kv_cache.ipynb\" target=\"_blank\">Colab</a> | [Open](14_kv_cache_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/14_kv_cache_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 24 | RoPE | 🔴 Hard | [Open](24_rope.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/24_rope.ipynb\" target=\"_blank\">Colab</a> | [Open](24_rope_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/24_rope_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 25 | Flash Attention | 🔴 Hard | [Open](25_flash_attention.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/25_flash_attention.ipynb\" target=\"_blank\">Colab</a> | [Open](25_flash_attention_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/25_flash_attention_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "\n",
+    "### 🏗️ Architecture & Adaptation\n",
+    "\n",
+    "| # | Problem | Difficulty | Template | Solution |\n",
+    "|:---:|---------|:----------:|:--------:|:--------:|\n",
+    "| 26 | LoRA | 🟡 Medium | [Open](26_lora.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/26_lora.ipynb\" target=\"_blank\">Colab</a> | [Open](26_lora_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/26_lora_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 27 | ViT Patch Embedding | 🟡 Medium | [Open](27_vit_patch.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/27_vit_patch.ipynb\" target=\"_blank\">Colab</a> | [Open](27_vit_patch_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/27_vit_patch_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 13 | GPT-2 Block | 🔴 Hard | [Open](13_gpt2_block.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/13_gpt2_block.ipynb\" target=\"_blank\">Colab</a> | [Open](13_gpt2_block_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/13_gpt2_block_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 28 | Mixture of Experts | 🔴 Hard | [Open](28_moe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/28_moe.ipynb\" target=\"_blank\">Colab</a> | [Open](28_moe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/28_moe_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "\n",
+    "### ⚙️ Training & Optimization\n",
+    "\n",
+    "| # | Problem | Difficulty | Template | Solution |\n",
+    "|:---:|---------|:----------:|:--------:|:--------:|\n",
+    "| 29 | Adam Optimizer | 🟡 Medium | [Open](29_adam.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/29_adam.ipynb\" target=\"_blank\">Colab</a> | [Open](29_adam_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/29_adam_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 30 | Cosine LR Scheduler | 🟡 Medium | [Open](30_cosine_lr.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/30_cosine_lr.ipynb\" target=\"_blank\">Colab</a> | [Open](30_cosine_lr_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/30_cosine_lr_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 40 | Linear Regression | 🟡 Medium | [Open](40_linear_regression.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb\" target=\"_blank\">Colab</a> | [Open](40_linear_regression_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/40_linear_regression_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "\n",
+    "### 🎯 Inference & Decoding\n",
+    "\n",
+    "| # | Problem | Difficulty | Template | Solution |\n",
+    "|:---:|---------|:----------:|:--------:|:--------:|\n",
+    "| 32 | Top-k / Top-p Sampling | 🟡 Medium | [Open](32_topk_sampling.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/32_topk_sampling.ipynb\" target=\"_blank\">Colab</a> | [Open](32_topk_sampling_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/32_topk_sampling_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 33 | Beam Search | 🟡 Medium | [Open](33_beam_search.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/33_beam_search.ipynb\" target=\"_blank\">Colab</a> | [Open](33_beam_search_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/33_beam_search_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 34 | Speculative Decoding | 🔴 Hard | [Open](34_speculative_decoding.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/34_speculative_decoding.ipynb\" target=\"_blank\">Colab</a> | [Open](34_speculative_decoding_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/34_speculative_decoding_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "\n",
+    "### 🔬 Advanced\n",
+    "\n",
+    "| # | Problem | Difficulty | Template | Solution |\n",
+    "|:---:|---------|:----------:|:--------:|:--------:|\n",
+    "| 35 | BPE Tokenizer | 🔴 Hard | [Open](35_bpe.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/35_bpe.ipynb\" target=\"_blank\">Colab</a> | [Open](35_bpe_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/35_bpe_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 36 | INT8 Quantization | 🔴 Hard | [Open](36_int8_quantization.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/36_int8_quantization.ipynb\" target=\"_blank\">Colab</a> | [Open](36_int8_quantization_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/36_int8_quantization_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 37 | DPO Loss | 🔴 Hard | [Open](37_dpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/37_dpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](37_dpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/37_dpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 38 | GRPO Loss | 🔴 Hard | [Open](38_grpo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](38_grpo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/38_grpo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "| 39 | PPO Loss | 🔴 Hard | [Open](39_ppo_loss.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb\" target=\"_blank\">Colab</a> | [Open](39_ppo_loss_solution.ipynb) · <a href=\"https://github.com/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">GitHub</a> · <a href=\"https://colab.research.google.com/github/duoan/TorchCode/blob/master/solutions/39_ppo_loss_solution.ipynb\" target=\"_blank\">Colab</a> |\n",
+    "\n",
+    "## Useful Commands\n",
+    "\n",
+    "```python\n",
+    "from torch_judge import check, hint, status\n",
+    "\n",
+    "status()                   # Progress dashboard\n",
+    "check(\"relu\")              # Judge your implementation\n",
+    "hint(\"causal_attention\")   # Get a hint\n",
+    "```"
    ]
   }
  ],
@@ -65,4 +161,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/templates/01_relu.ipynb b/templates/01_relu.ipynb
index 7d5b2339b3d9acc8f9518bbf2b31e077b977d445..abe82e0fe5813e0686d6a6493975813ba58cd811 100644
--- a/templates/01_relu.ipynb
+++ b/templates/01_relu.ipynb
@@ -30,6 +30,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/02_softmax.ipynb b/templates/02_softmax.ipynb
index d43498de168172c67622f76c039d3e699177fa2d..b8ed6f88c79770a3af47c48123b8640d13d5b499 100644
--- a/templates/02_softmax.ipynb
+++ b/templates/02_softmax.ipynb
@@ -30,6 +30,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/03_linear.ipynb b/templates/03_linear.ipynb
index 95d05b30bfb6592a49efba04ccb05c62c817b2e4..29e46641738f0ba25df9ad2adf69e3f7287d4650 100644
--- a/templates/03_linear.ipynb
+++ b/templates/03_linear.ipynb
@@ -26,6 +26,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/04_layernorm.ipynb b/templates/04_layernorm.ipynb
index 4da15071f3f90ca716315f364bc198610f298914..89f030baebe3ebfef1724914f8cc62055ec4566c 100644
--- a/templates/04_layernorm.ipynb
+++ b/templates/04_layernorm.ipynb
@@ -32,6 +32,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/05_attention.ipynb b/templates/05_attention.ipynb
index cfad95858fbd75f0fa9a5b4b7d85a38d5e4afa41..047243ee33c18b674d114479227819a49ad146dc 100644
--- a/templates/05_attention.ipynb
+++ b/templates/05_attention.ipynb
@@ -29,6 +29,20 @@
     "- Must handle cross-attention (seq_q ≠ seq_k)"
    ]
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/templates/06_multihead_attention.ipynb b/templates/06_multihead_attention.ipynb
index c51e43a1d187253a299d50e16c0e84d5df120b96..78127148c82c65562b6b97c661f9f8b19c9f70d2 100644
--- a/templates/06_multihead_attention.ipynb
+++ b/templates/06_multihead_attention.ipynb
@@ -37,6 +37,21 @@
     "5. Output projection: `self.W_o(concat)`"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "02a059c4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/templates/07_batchnorm.ipynb b/templates/07_batchnorm.ipynb
index 5307942a86cfde76c51f03f0bb70b3c56b048b55..8b1c7221deda0ce80cbac734a2bf13510409854a 100644
--- a/templates/07_batchnorm.ipynb
+++ b/templates/07_batchnorm.ipynb
@@ -1,131 +1,145 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "89fd15cb",
-      "metadata": {},
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
-        "\n",
-        "# 🟡 Medium: Implement BatchNorm\n",
-        "\n",
-        "Implement **Batch Normalization** with both **training** and **inference** behavior.\n",
-        "\n",
-        "In training mode, use **batch statistics** and update running estimates:\n",
-        "\n",
-        "$$\\text{BN}(x) = \\gamma \\cdot \\frac{x - \\mu_B}{\\sqrt{\\sigma_B^2 + \\epsilon}} + \\beta$$\n",
-        "\n",
-        "where $\\mu_B$ and $\\sigma_B^2$ are the mean and variance computed **across the batch** (dim=0).\n",
-        "\n",
-        "In inference mode, use the provided **running mean/var** instead of current batch stats.\n",
-        "\n",
-        "### Signature\n",
-        "```python\n",
-        "def my_batch_norm(\n",
-        "    x: torch.Tensor,\n",
-        "    gamma: torch.Tensor,\n",
-        "    beta: torch.Tensor,\n",
-        "    running_mean: torch.Tensor,\n",
-        "    running_var: torch.Tensor,\n",
-        "    eps: float = 1e-5,\n",
-        "    momentum: float = 0.1,\n",
-        "    training: bool = True,\n",
-        ") -> torch.Tensor:\n",
-        "    # x: (N, D) — normalize each feature across all samples in the batch\n",
-        "    # running_mean, running_var: updated in-place during training; used as-is during inference\n",
-        "```\n",
-        "\n",
-        "### Rules\n",
-        "- Do **NOT** use `F.batch_norm`, `nn.BatchNorm1d`, etc.\n",
-        "- Compute batch mean and variance over `dim=0` with `unbiased=False`\n",
-        "- Update running stats like PyTorch: `running = (1 - momentum) * running + momentum * batch_stat`\n",
-        "- Use `running_mean` / `running_var` for inference when `training=False`\n",
-        "- Must support autograd w.r.t. `x`, `gamma`, `beta`（running statistics 应视作 buffer，而不是需要梯度的参数）"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "d946ca79",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✏️ YOUR IMPLEMENTATION HERE\n",
-        "\n",
-        "def my_batch_norm(\n",
-        "    x,\n",
-        "    gamma,\n",
-        "    beta,\n",
-        "    running_mean,\n",
-        "    running_var,\n",
-        "    eps=1e-5,\n",
-        "    momentum=0.1,\n",
-        "    training=True,\n",
-        "):\n",
-        "    pass  # Replace this"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "26b93e71",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# 🧪 Debug\n",
-        "x = torch.randn(8, 4)\n",
-        "gamma = torch.ones(4)\n",
-        "beta = torch.zeros(4)\n",
-        "\n",
-        "# Running stats typically live on the same device and shape as features\n",
-        "running_mean = torch.zeros(4)\n",
-        "running_var = torch.ones(4)\n",
-        "\n",
-        "# Training mode: uses batch stats and updates running_mean / running_var\n",
-        "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
-        "print(\"[Train] Output shape:\", out_train.shape)\n",
-        "print(\"[Train] Column means:\", out_train.mean(dim=0))   # should be ~0\n",
-        "print(\"[Train] Column stds: \", out_train.std(dim=0))    # should be ~1\n",
-        "print(\"Updated running_mean:\", running_mean)\n",
-        "print(\"Updated running_var:\", running_var)\n",
-        "\n",
-        "# Inference mode: uses running_mean / running_var only\n",
-        "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
-        "print(\"[Eval] Output shape:\", out_eval.shape)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SUBMIT\n",
-        "from torch_judge import check\n",
-        "check(\"batchnorm\")"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11.0"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "89fd15cb",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/07_batchnorm.ipynb)\n",
+    "\n",
+    "# 🟡 Medium: Implement BatchNorm\n",
+    "\n",
+    "Implement **Batch Normalization** with both **training** and **inference** behavior.\n",
+    "\n",
+    "In training mode, use **batch statistics** and update running estimates:\n",
+    "\n",
+    "$$\\text{BN}(x) = \\gamma \\cdot \\frac{x - \\mu_B}{\\sqrt{\\sigma_B^2 + \\epsilon}} + \\beta$$\n",
+    "\n",
+    "where $\\mu_B$ and $\\sigma_B^2$ are the mean and variance computed **across the batch** (dim=0).\n",
+    "\n",
+    "In inference mode, use the provided **running mean/var** instead of current batch stats.\n",
+    "\n",
+    "### Signature\n",
+    "```python\n",
+    "def my_batch_norm(\n",
+    "    x: torch.Tensor,\n",
+    "    gamma: torch.Tensor,\n",
+    "    beta: torch.Tensor,\n",
+    "    running_mean: torch.Tensor,\n",
+    "    running_var: torch.Tensor,\n",
+    "    eps: float = 1e-5,\n",
+    "    momentum: float = 0.1,\n",
+    "    training: bool = True,\n",
+    ") -> torch.Tensor:\n",
+    "    # x: (N, D) — normalize each feature across all samples in the batch\n",
+    "    # running_mean, running_var: updated in-place during training; used as-is during inference\n",
+    "```\n",
+    "\n",
+    "### Rules\n",
+    "- Do **NOT** use `F.batch_norm`, `nn.BatchNorm1d`, etc.\n",
+    "- Compute batch mean and variance over `dim=0` with `unbiased=False`\n",
+    "- Update running stats like PyTorch: `running = (1 - momentum) * running + momentum * batch_stat`\n",
+    "- Use `running_mean` / `running_var` for inference when `training=False`\n",
+    "- Must support autograd w.r.t. `x`, `gamma`, `beta`（running statistics 应视作 buffer，而不是需要梯度的参数）"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d946ca79",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✏️ YOUR IMPLEMENTATION HERE\n",
+    "\n",
+    "def my_batch_norm(\n",
+    "    x,\n",
+    "    gamma,\n",
+    "    beta,\n",
+    "    running_mean,\n",
+    "    running_var,\n",
+    "    eps=1e-5,\n",
+    "    momentum=0.1,\n",
+    "    training=True,\n",
+    "):\n",
+    "    pass  # Replace this"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "26b93e71",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 🧪 Debug\n",
+    "x = torch.randn(8, 4)\n",
+    "gamma = torch.ones(4)\n",
+    "beta = torch.zeros(4)\n",
+    "\n",
+    "# Running stats typically live on the same device and shape as features\n",
+    "running_mean = torch.zeros(4)\n",
+    "running_var = torch.ones(4)\n",
+    "\n",
+    "# Training mode: uses batch stats and updates running_mean / running_var\n",
+    "out_train = my_batch_norm(x, gamma, beta, running_mean, running_var, training=True)\n",
+    "print(\"[Train] Output shape:\", out_train.shape)\n",
+    "print(\"[Train] Column means:\", out_train.mean(dim=0))   # should be ~0\n",
+    "print(\"[Train] Column stds: \", out_train.std(dim=0))    # should be ~1\n",
+    "print(\"Updated running_mean:\", running_mean)\n",
+    "print(\"Updated running_var:\", running_var)\n",
+    "\n",
+    "# Inference mode: uses running_mean / running_var only\n",
+    "out_eval = my_batch_norm(x, gamma, beta, running_mean, running_var, training=False)\n",
+    "print(\"[Eval] Output shape:\", out_eval.shape)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SUBMIT\n",
+    "from torch_judge import check\n",
+    "check(\"batchnorm\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/templates/08_rmsnorm.ipynb b/templates/08_rmsnorm.ipynb
index 35930811e2a97ee01eb3a59ee91b84974df3a5d8..facfbf5393c0034bb3220d7d45f47c95a15f3a1b 100644
--- a/templates/08_rmsnorm.ipynb
+++ b/templates/08_rmsnorm.ipynb
@@ -38,6 +38,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/09_causal_attention.ipynb b/templates/09_causal_attention.ipynb
index 0fb2059b0726b7ee95f8b76b1dc0554d80279fd2..24bdb6c4b24f290afb2c6526f0f139ebd3e6c6de 100644
--- a/templates/09_causal_attention.ipynb
+++ b/templates/09_causal_attention.ipynb
@@ -40,6 +40,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/10_gqa.ipynb b/templates/10_gqa.ipynb
index b503113047fee5020cf82fe339e7083d838807b4..92d544da838216c7390987a67daf6050dfe47dc5 100644
--- a/templates/10_gqa.ipynb
+++ b/templates/10_gqa.ipynb
@@ -43,6 +43,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/11_sliding_window.ipynb b/templates/11_sliding_window.ipynb
index 57a0e9d75516176f1e04bf567b322ff54f313b37..d201fb1a32b483d4172d7a5420a4490b64d56d4e 100644
--- a/templates/11_sliding_window.ipynb
+++ b/templates/11_sliding_window.ipynb
@@ -40,6 +40,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/12_linear_attention.ipynb b/templates/12_linear_attention.ipynb
index e1cd6faa1168b617eeedcc43d15aa59f7fd707d1..7d6ceeb51d5f0a76a6794fc2b8ebe4596bf1b8b0 100644
--- a/templates/12_linear_attention.ipynb
+++ b/templates/12_linear_attention.ipynb
@@ -47,6 +47,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/13_gpt2_block.ipynb b/templates/13_gpt2_block.ipynb
index 714a108e72360a03132309b3b543070a161b351f..3211781378cf53f5932ce069d9495b5e2baa7895 100644
--- a/templates/13_gpt2_block.ipynb
+++ b/templates/13_gpt2_block.ipynb
@@ -47,6 +47,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/14_kv_cache.ipynb b/templates/14_kv_cache.ipynb
index 2e1cd206704ec8f453928a215209390d5d5ef7bc..a7d9e0bee45a4a98ef825ac4afff8b6593cdc7a8 100644
--- a/templates/14_kv_cache.ipynb
+++ b/templates/14_kv_cache.ipynb
@@ -39,6 +39,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/15_mlp.ipynb b/templates/15_mlp.ipynb
index a07525a5ae3061e0657d2d0c1e33f6255b2b5de7..0d238faecd07359070a096a1101b9c97ecf193e1 100644
--- a/templates/15_mlp.ipynb
+++ b/templates/15_mlp.ipynb
@@ -35,6 +35,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/16_cross_entropy.ipynb b/templates/16_cross_entropy.ipynb
index d20e2c4247d5d95204c3cd21bd24238d2b27bb26..80b77651306233146da9e628e2f9290772eba1ed 100644
--- a/templates/16_cross_entropy.ipynb
+++ b/templates/16_cross_entropy.ipynb
@@ -25,6 +25,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/17_dropout.ipynb b/templates/17_dropout.ipynb
index c6efad070cabd6adfd7d73c5753134a9b7de4473..d2ec346ed38fe401f39bbc70944ea9d709e02fd1 100644
--- a/templates/17_dropout.ipynb
+++ b/templates/17_dropout.ipynb
@@ -24,6 +24,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/18_embedding.ipynb b/templates/18_embedding.ipynb
index 9e376a51ca0a441c7be4c05744f242ebfa3be7bc..1dc318ca1c67d25a15eca7051bce4968f25282fd 100644
--- a/templates/18_embedding.ipynb
+++ b/templates/18_embedding.ipynb
@@ -24,6 +24,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/19_gelu.ipynb b/templates/19_gelu.ipynb
index 29c5b1180438e9a6ef1dfb6bbb870a48fb3f534b..613c65c9319910560c15d18a7c9a7552bc923c6b 100644
--- a/templates/19_gelu.ipynb
+++ b/templates/19_gelu.ipynb
@@ -23,6 +23,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/20_weight_init.ipynb b/templates/20_weight_init.ipynb
index 0f4e12ef7572f69c9c13e5949cdf91f297325352..2a3534379151b08d2c36d7ceed5f4cb4e16f3ea0 100644
--- a/templates/20_weight_init.ipynb
+++ b/templates/20_weight_init.ipynb
@@ -22,6 +22,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/21_gradient_clipping.ipynb b/templates/21_gradient_clipping.ipynb
index 3f6c74075c652a186bb6dd3b288424b2624d38e8..4f783f8f875ef69b8a48b8e805fcf9caf852ab17 100644
--- a/templates/21_gradient_clipping.ipynb
+++ b/templates/21_gradient_clipping.ipynb
@@ -24,6 +24,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/22_conv2d.ipynb b/templates/22_conv2d.ipynb
index c8a997c640cb108aef6c520b0fb10e06d356ab58..9f5c0cd5870260b6c99ddb4923d105ed145c46a2 100644
--- a/templates/22_conv2d.ipynb
+++ b/templates/22_conv2d.ipynb
@@ -24,6 +24,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/23_cross_attention.ipynb b/templates/23_cross_attention.ipynb
index fc1616f11ae9dbe933e685588c9fbb1ad8bad378..246728587881efc37df2dfe2bcafc94b9952835d 100644
--- a/templates/23_cross_attention.ipynb
+++ b/templates/23_cross_attention.ipynb
@@ -25,6 +25,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/24_rope.ipynb b/templates/24_rope.ipynb
index 07bd771e74a3cf8a62b2f49fc93dfb380a26f1ef..d4ffd7c6f89892ff6c13675fea88fc62e55d57bf 100644
--- a/templates/24_rope.ipynb
+++ b/templates/24_rope.ipynb
@@ -26,6 +26,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/25_flash_attention.ipynb b/templates/25_flash_attention.ipynb
index 4404bd96a4a0e43a1397df7fd147a2d9a80813fc..249cd2ad582824f6c1e4ef887ada16ee41993bd5 100644
--- a/templates/25_flash_attention.ipynb
+++ b/templates/25_flash_attention.ipynb
@@ -28,6 +28,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/26_lora.ipynb b/templates/26_lora.ipynb
index 5040e47aa79d41fbfc9c15c4b251327e28aa6037..eaf7f692766cae547ff05cc59f068dab011e3d12 100644
--- a/templates/26_lora.ipynb
+++ b/templates/26_lora.ipynb
@@ -27,6 +27,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/27_vit_patch.ipynb b/templates/27_vit_patch.ipynb
index 2e0fbe429f6f6d97e61ab30fe5b3c96b2716b386..17acd59cf2cf883fddc6e916813b164b83c0f586 100644
--- a/templates/27_vit_patch.ipynb
+++ b/templates/27_vit_patch.ipynb
@@ -26,6 +26,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/28_moe.ipynb b/templates/28_moe.ipynb
index 61f187ee3bb3c046da95dafc3cba3b7959e26716..f50fe3324863f5aff5c475f284692ab2e4929ebe 100644
--- a/templates/28_moe.ipynb
+++ b/templates/28_moe.ipynb
@@ -25,6 +25,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/29_adam.ipynb b/templates/29_adam.ipynb
index 1e4b2369e7516123ad653d3434c0e243fe0a1aac..a3c1d639d9908681583d7b4bccd0de557595c382 100644
--- a/templates/29_adam.ipynb
+++ b/templates/29_adam.ipynb
@@ -29,6 +29,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/30_cosine_lr.ipynb b/templates/30_cosine_lr.ipynb
index c24e05e1af32dee7f4e4d5c014efe69b27d7d38b..108f579cb15f54eb0d8451e37892742bb8781fbf 100644
--- a/templates/30_cosine_lr.ipynb
+++ b/templates/30_cosine_lr.ipynb
@@ -24,6 +24,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/31_gradient_accumulation.ipynb b/templates/31_gradient_accumulation.ipynb
index 1aea7c63db9d9ca367eeafb5c09ade325da70360..78e205e397cc029b472dea0f5e411e311ffc61d9 100644
--- a/templates/31_gradient_accumulation.ipynb
+++ b/templates/31_gradient_accumulation.ipynb
@@ -27,6 +27,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/32_topk_sampling.ipynb b/templates/32_topk_sampling.ipynb
index 318889a35668c97c2cf6c3d9c8f84ca20cd757de..bbb788e3509eb640dd003256a6ecb0f72459c3ef 100644
--- a/templates/32_topk_sampling.ipynb
+++ b/templates/32_topk_sampling.ipynb
@@ -25,6 +25,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/33_beam_search.ipynb b/templates/33_beam_search.ipynb
index c4675dab574e6a61c22cda8ca699549015ea6d10..7735f71ae8656c736bb9439629b1e5d62c9bdfed 100644
--- a/templates/33_beam_search.ipynb
+++ b/templates/33_beam_search.ipynb
@@ -25,6 +25,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/34_speculative_decoding.ipynb b/templates/34_speculative_decoding.ipynb
index a8e171f75f2e8fff1f3f06cf8a1a8bbc635bed8c..5b8e6923f320f6fe066d407a97689953c0231f1f 100644
--- a/templates/34_speculative_decoding.ipynb
+++ b/templates/34_speculative_decoding.ipynb
@@ -27,6 +27,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/35_bpe.ipynb b/templates/35_bpe.ipynb
index 078457c53b12a7c29dff4797ea7aeaddfcf26fc3..9adfca0c7df6a540fd5ac0c68b414d607276f18d 100644
--- a/templates/35_bpe.ipynb
+++ b/templates/35_bpe.ipynb
@@ -26,6 +26,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/36_int8_quantization.ipynb b/templates/36_int8_quantization.ipynb
index 4f4fc8c22b4ec6ef0b4f3820b0b2f6fba1336e0a..0c3516ae13fbd5b22497a846a16268f03922f12d 100644
--- a/templates/36_int8_quantization.ipynb
+++ b/templates/36_int8_quantization.ipynb
@@ -25,6 +25,20 @@
    ],
    "outputs": []
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "metadata": {},
diff --git a/templates/37_dpo_loss.ipynb b/templates/37_dpo_loss.ipynb
index a00ff9b3914247d90ce98f01f25970bb48dc83c1..38ea5b528fd2ac9c6aea39e9c9f27e67fee97a61 100644
--- a/templates/37_dpo_loss.ipynb
+++ b/templates/37_dpo_loss.ipynb
@@ -21,6 +21,20 @@
     "```"
    ]
   },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/templates/38_grpo_loss.ipynb b/templates/38_grpo_loss.ipynb
index c752afbf537e45cb0f002a79a8f62505e3da8b9b..546f212c9b91bf4f4af2421f6200ffd818d9f7e3 100644
--- a/templates/38_grpo_loss.ipynb
+++ b/templates/38_grpo_loss.ipynb
@@ -1,101 +1,115 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "968cc37c",
-      "metadata": {},
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb)\n",
-        "\n",
-        "# 🔴 Hard: GRPO Loss\n",
-        "\n",
-        "Implement the **Group Relative Policy Optimization (GRPO)** loss — a group-wise, baseline-subtracted REINFORCE objective commonly used in RLAIF (reinforcement learning from AI feedback).\n",
-        "\n",
-        "Given a batch of log-probabilities, scalar rewards, and group ids (one group per prompt), define the within-group normalized advantages:\n",
-        "\n",
-        "$$A_i = \\frac{r_i - \\bar r_{g(i)}}{\\text{std}_{g(i)} + \\epsilon}$$\n",
-        "\n",
-        "where \\(\\bar r_{g(i)}\\) and \\(\\text{std}_{g(i)}\\) are the mean and standard deviation of rewards in the group of example \\(i\\).\n",
-        "\n",
-        "The GRPO loss is then the negative advantage-weighted log-probability:\n",
-        "\n",
-        "$$\\mathcal{L}_{\\text{GRPO}} = -\\mathbb{E}_i \\big[\\,\\text{stop\\_grad}(A_i)\\, \\log \\pi_\\theta(y_i)\\big].$$\n",
-        "\n",
-        "### Signature\n",
-        "```python\n",
-        "from torch import Tensor\n",
-        "\n",
-        "def grpo_loss(logps: Tensor, rewards: Tensor, group_ids: Tensor,\n",
-        "              eps: float = 1e-5) -> Tensor:\n",
-        "    \"\"\"GRPO loss over a batch.\n",
-        "\n",
-        "    logps: (B,) policy log-probs for each sampled response\n",
-        "    rewards: (B,) scalar rewards for each response\n",
-        "    group_ids: (B,) integers, same id = same prompt/group\n",
-        "    returns: scalar loss (Tensor)\n",
-        "    \"\"\"\n",
-        "```"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "d1038dfe",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "import torch.nn.functional as F"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "68d0bd84",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✏️ YOUR IMPLEMENTATION HERE\n",
-        "\n",
-        "from torch import Tensor\n",
-        "\n",
-        "def grpo_loss(logps: Tensor, rewards: Tensor, group_ids: Tensor,\n",
-        "              eps: float = 1e-5) -> Tensor:\n",
-        "    pass  # compute normalized advantages per group and return -mean(adv.detach() * logps)"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "eb215c40",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# 🧪 Debug\n",
-        "logps = torch.tensor([0.0, -0.5, -1.0, -1.5])\n",
-        "rewards = torch.tensor([1.0, 0.8, 0.2, 0.0])\n",
-        "group_ids = torch.tensor([0, 0, 1, 1])\n",
-        "print('Loss:', grpo_loss(logps, rewards, group_ids).item())"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "id": "95b2e29e",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SUBMIT\n",
-        "from torch_judge import check\n",
-        "check('grpo_loss')"
-      ]
-    }
-  ],
-  "metadata": {
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "968cc37c",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/38_grpo_loss.ipynb)\n",
+    "\n",
+    "# 🔴 Hard: GRPO Loss\n",
+    "\n",
+    "Implement the **Group Relative Policy Optimization (GRPO)** loss — a group-wise, baseline-subtracted REINFORCE objective commonly used in RLAIF (reinforcement learning from AI feedback).\n",
+    "\n",
+    "Given a batch of log-probabilities, scalar rewards, and group ids (one group per prompt), define the within-group normalized advantages:\n",
+    "\n",
+    "$$A_i = \\frac{r_i - \\bar r_{g(i)}}{\\text{std}_{g(i)} + \\epsilon}$$\n",
+    "\n",
+    "where \\(\\bar r_{g(i)}\\) and \\(\\text{std}_{g(i)}\\) are the mean and standard deviation of rewards in the group of example \\(i\\).\n",
+    "\n",
+    "The GRPO loss is then the negative advantage-weighted log-probability:\n",
+    "\n",
+    "$$\\mathcal{L}_{\\text{GRPO}} = -\\mathbb{E}_i \\big[\\,\\text{stop\\_grad}(A_i)\\, \\log \\pi_\\theta(y_i)\\big].$$\n",
+    "\n",
+    "### Signature\n",
+    "```python\n",
+    "from torch import Tensor\n",
+    "\n",
+    "def grpo_loss(logps: Tensor, rewards: Tensor, group_ids: Tensor,\n",
+    "              eps: float = 1e-5) -> Tensor:\n",
+    "    \"\"\"GRPO loss over a batch.\n",
+    "\n",
+    "    logps: (B,) policy log-probs for each sampled response\n",
+    "    rewards: (B,) scalar rewards for each response\n",
+    "    group_ids: (B,) integers, same id = same prompt/group\n",
+    "    returns: scalar loss (Tensor)\n",
+    "    \"\"\"\n",
+    "```"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "d1038dfe",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn.functional as F"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "68d0bd84",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✏️ YOUR IMPLEMENTATION HERE\n",
+    "\n",
+    "from torch import Tensor\n",
+    "\n",
+    "def grpo_loss(logps: Tensor, rewards: Tensor, group_ids: Tensor,\n",
+    "              eps: float = 1e-5) -> Tensor:\n",
+    "    pass  # compute normalized advantages per group and return -mean(adv.detach() * logps)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "eb215c40",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 🧪 Debug\n",
+    "logps = torch.tensor([0.0, -0.5, -1.0, -1.5])\n",
+    "rewards = torch.tensor([1.0, 0.8, 0.2, 0.0])\n",
+    "group_ids = torch.tensor([0, 0, 1, 1])\n",
+    "print('Loss:', grpo_loss(logps, rewards, group_ids).item())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "95b2e29e",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SUBMIT\n",
+    "from torch_judge import check\n",
+    "check('grpo_loss')"
+   ]
+  }
+ ],
+ "metadata": {
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/templates/39_ppo_loss.ipynb b/templates/39_ppo_loss.ipynb
index ddf107adea4fd18c780d9429619a865b06142e14..795857182dbbabc83bbe79871a6326e75aad0293 100644
--- a/templates/39_ppo_loss.ipynb
+++ b/templates/39_ppo_loss.ipynb
@@ -1,106 +1,120 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "id": "6674fa96",
-      "metadata": {},
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb)\n",
-        "\n",
-        "# 🔴 Hard: PPO Clipped Loss\n",
-        "\n",
-        "Implement the **PPO (Proximal Policy Optimization)** **clipped surrogate loss**.\n",
-        "\n",
-        "Given:\n",
-        "- `new_logps`: current policy log-probs $(B,)$\n",
-        "- `old_logps`: old policy log-probs $(B,)$\n",
-        "- `advantages`: advantage estimates $(B,)$\n",
-        "\n",
-        "Define the ratio\n",
-        "\n",
-        "$$ r_i = \\exp(\\text{new\\_logps}_i - \\text{old\\_logps}_i). $$\n",
-        "\n",
-        "Then compute\n",
-        "- $L^{\\text{unclipped}}_i = r_i A_i$\n",
-        "- $L^{\\text{clipped}}_i = \\operatorname{clip}(r_i, 1-\\epsilon, 1+\\epsilon) A_i$\n",
-        "\n",
-        "The loss is the negative batch mean of the elementwise minimum:\n",
-        "\n",
-        "$$\n",
-        "\\mathcal{L}_\\text{PPO} = -\\mathbb{E}_i\\big[\\min(L^{\\text{unclipped}}_i, L^{\\text{clipped}}_i)\\big].\n",
-        "$$\n",
-        "\n",
-        "Implementation notes: detach `old_logps` and `advantages` so gradients only flow through `new_logps`.\n",
-        "\n",
-        "### Signature\n",
-        "```python\n",
-        "from torch import Tensor\n",
-        "\n",
-        "def ppo_loss(new_logps: Tensor, old_logps: Tensor, advantages: Tensor,\n",
-        "             clip_ratio: float = 0.2) -> Tensor:\n",
-        "    \"\"\"PPO clipped surrogate loss over a batch.\"\"\"\n",
-        "```\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "import torch.nn.functional as F\n",
-        "from torch import Tensor\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✏️ YOUR IMPLEMENTATION HERE\n",
-        "\n",
-        "def ppo_loss(new_logps: Tensor, old_logps: Tensor, advantages: Tensor,\n",
-        "             clip_ratio: float = 0.2) -> Tensor:\n",
-        "    pass  # -mean(min(r * adv, clamp(r, 1-clip, 1+clip) * adv)) with gradients only through new_logps\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# 🧪 Debug\n",
-        "new_logps = torch.tensor([0.0, -0.2, -0.4, -0.6])\n",
-        "old_logps = torch.tensor([0.0, -0.1, -0.5, -0.5])\n",
-        "advantages = torch.tensor([1.0, -1.0, 0.5, -0.5])\n",
-        "print('Loss:', ppo_loss(new_logps, old_logps, advantages, clip_ratio=0.2))\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SUBMIT\n",
-        "from torch_judge import check\n",
-        "check('ppo_loss')\n"
-      ]
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "id": "6674fa96",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/39_ppo_loss.ipynb)\n",
+    "\n",
+    "# 🔴 Hard: PPO Clipped Loss\n",
+    "\n",
+    "Implement the **PPO (Proximal Policy Optimization)** **clipped surrogate loss**.\n",
+    "\n",
+    "Given:\n",
+    "- `new_logps`: current policy log-probs $(B,)$\n",
+    "- `old_logps`: old policy log-probs $(B,)$\n",
+    "- `advantages`: advantage estimates $(B,)$\n",
+    "\n",
+    "Define the ratio\n",
+    "\n",
+    "$$ r_i = \\exp(\\text{new\\_logps}_i - \\text{old\\_logps}_i). $$\n",
+    "\n",
+    "Then compute\n",
+    "- $L^{\\text{unclipped}}_i = r_i A_i$\n",
+    "- $L^{\\text{clipped}}_i = \\operatorname{clip}(r_i, 1-\\epsilon, 1+\\epsilon) A_i$\n",
+    "\n",
+    "The loss is the negative batch mean of the elementwise minimum:\n",
+    "\n",
+    "$$\n",
+    "\\mathcal{L}_\\text{PPO} = -\\mathbb{E}_i\\big[\\min(L^{\\text{unclipped}}_i, L^{\\text{clipped}}_i)\\big].\n",
+    "$$\n",
+    "\n",
+    "Implementation notes: detach `old_logps` and `advantages` so gradients only flow through `new_logps`.\n",
+    "\n",
+    "### Signature\n",
+    "```python\n",
+    "from torch import Tensor\n",
+    "\n",
+    "def ppo_loss(new_logps: Tensor, old_logps: Tensor, advantages: Tensor,\n",
+    "             clip_ratio: float = 0.2) -> Tensor:\n",
+    "    \"\"\"PPO clipped surrogate loss over a batch.\"\"\"\n",
+    "```\n"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 5
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn.functional as F\n",
+    "from torch import Tensor\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✏️ YOUR IMPLEMENTATION HERE\n",
+    "\n",
+    "def ppo_loss(new_logps: Tensor, old_logps: Tensor, advantages: Tensor,\n",
+    "             clip_ratio: float = 0.2) -> Tensor:\n",
+    "    pass  # -mean(min(r * adv, clamp(r, 1-clip, 1+clip) * adv)) with gradients only through new_logps\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 🧪 Debug\n",
+    "new_logps = torch.tensor([0.0, -0.2, -0.4, -0.6])\n",
+    "old_logps = torch.tensor([0.0, -0.1, -0.5, -0.5])\n",
+    "advantages = torch.tensor([1.0, -1.0, 0.5, -0.5])\n",
+    "print('Loss:', ppo_loss(new_logps, old_logps, advantages, clip_ratio=0.2))\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SUBMIT\n",
+    "from torch_judge import check\n",
+    "check('ppo_loss')\n"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
 }
diff --git a/templates/40_linear_regression.ipynb b/templates/40_linear_regression.ipynb
index 1c66090ca5e8376088a99cb02434c24a9937730a..1d099e7b728ea1437d80e60146b0a94ecc4e8d62 100644
--- a/templates/40_linear_regression.ipynb
+++ b/templates/40_linear_regression.ipynb
@@ -1,142 +1,156 @@
 {
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb)\n",
-        "\n",
-        "# 🟡 Medium: Linear Regression\n",
-        "\n",
-        "Implement **linear regression** using three different approaches — all in pure PyTorch.\n",
-        "\n",
-        "Given data `X` of shape `(N, D)` and targets `y` of shape `(N,)`, find weight `w` of shape `(D,)` and bias `b` (scalar) such that:\n",
-        "\n",
-        "$$\\hat{y} = Xw + b$$\n",
-        "\n",
-        "### Signature\n",
-        "```python\n",
-        "class LinearRegression:\n",
-        "    def closed_form(self, X: Tensor, y: Tensor) -> tuple[Tensor, Tensor]: ...\n",
-        "    def gradient_descent(self, X: Tensor, y: Tensor, lr=0.01, steps=1000) -> tuple[Tensor, Tensor]: ...\n",
-        "    def nn_linear(self, X: Tensor, y: Tensor, lr=0.01, steps=1000) -> tuple[Tensor, Tensor]: ...\n",
-        "```\n",
-        "\n",
-        "All methods return `(w, b)` where `w` has shape `(D,)` and `b` has shape `()`.\n",
-        "\n",
-        "### Method 1 — Closed-Form (Normal Equation)\n",
-        "Augment X with a ones column, then solve:\n",
-        "\n",
-        "$$\\theta = (X_{aug}^T X_{aug})^{-1} X_{aug}^T y$$\n",
-        "\n",
-        "Or use `torch.linalg.lstsq` / `torch.linalg.solve`.\n",
-        "\n",
-        "### Method 2 — Gradient Descent from Scratch\n",
-        "Initialize `w` and `b` to zeros. Repeat for `steps` iterations:\n",
-        "```\n",
-        "pred = X @ w + b\n",
-        "error = pred - y\n",
-        "grad_w = (2/N) * X^T @ error\n",
-        "grad_b = (2/N) * error.sum()\n",
-        "w -= lr * grad_w\n",
-        "b -= lr * grad_b\n",
-        "```\n",
-        "\n",
-        "### Method 3 — PyTorch nn.Linear\n",
-        "Create `nn.Linear(D, 1)`, use `nn.MSELoss` and an optimizer (e.g., `torch.optim.SGD`).\n",
-        "After training, extract `w` and `b` from the layer.\n",
-        "\n",
-        "### Rules\n",
-        "- All inputs and outputs must be **PyTorch tensors**\n",
-        "- Do **NOT** use numpy or sklearn\n",
-        "- `closed_form` must not use iterative optimization\n",
-        "- `gradient_descent` must manually compute gradients (no `autograd`)\n",
-        "- `nn_linear` should use `torch.nn.Linear` and `loss.backward()`"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "import torch\n",
-        "import torch.nn as nn"
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✏️ YOUR IMPLEMENTATION HERE\n",
-        "\n",
-        "class LinearRegression:\n",
-        "    def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
-        "        \"\"\"Normal equation: w = (X^T X)^{-1} X^T y\"\"\"\n",
-        "        pass  # Return (w, b)\n",
-        "\n",
-        "    def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
-        "                         lr: float = 0.01, steps: int = 1000):\n",
-        "        \"\"\"Manual gradient descent loop\"\"\"\n",
-        "        pass  # Return (w, b)\n",
-        "\n",
-        "    def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
-        "                  lr: float = 0.01, steps: int = 1000):\n",
-        "        \"\"\"Train nn.Linear with autograd\"\"\"\n",
-        "        pass  # Return (w, b)"
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# 🧪 Debug\n",
-        "torch.manual_seed(42)\n",
-        "X = torch.randn(100, 3)\n",
-        "true_w = torch.tensor([2.0, -1.0, 0.5])\n",
-        "y = X @ true_w + 3.0\n",
-        "\n",
-        "model = LinearRegression()\n",
-        "\n",
-        "w_cf, b_cf = model.closed_form(X, y)\n",
-        "print(f\"Closed-form:  w={w_cf}, b={b_cf.item():.4f}\")\n",
-        "\n",
-        "w_gd, b_gd = model.gradient_descent(X, y, lr=0.05, steps=2000)\n",
-        "print(f\"Grad descent: w={w_gd}, b={b_gd.item():.4f}\")\n",
-        "\n",
-        "w_nn, b_nn = model.nn_linear(X, y, lr=0.05, steps=2000)\n",
-        "print(f\"nn.Linear:    w={w_nn}, b={b_nn.item():.4f}\")\n",
-        "\n",
-        "print(f\"\\nTrue:         w={true_w}, b=3.0\")"
-      ],
-      "execution_count": null
-    },
-    {
-      "cell_type": "code",
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "# ✅ SUBMIT\n",
-        "from torch_judge import check\n",
-        "check(\"linear_regression\")"
-      ],
-      "execution_count": null
-    }
-  ],
-  "metadata": {
-    "kernelspec": {
-      "display_name": "Python 3",
-      "language": "python",
-      "name": "python3"
-    },
-    "language_info": {
-      "name": "python",
-      "version": "3.11.0"
-    }
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "[![Open In Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/duoan/TorchCode/blob/master/templates/40_linear_regression.ipynb)\n",
+    "\n",
+    "# 🟡 Medium: Linear Regression\n",
+    "\n",
+    "Implement **linear regression** using three different approaches — all in pure PyTorch.\n",
+    "\n",
+    "Given data `X` of shape `(N, D)` and targets `y` of shape `(N,)`, find weight `w` of shape `(D,)` and bias `b` (scalar) such that:\n",
+    "\n",
+    "$$\\hat{y} = Xw + b$$\n",
+    "\n",
+    "### Signature\n",
+    "```python\n",
+    "class LinearRegression:\n",
+    "    def closed_form(self, X: Tensor, y: Tensor) -> tuple[Tensor, Tensor]: ...\n",
+    "    def gradient_descent(self, X: Tensor, y: Tensor, lr=0.01, steps=1000) -> tuple[Tensor, Tensor]: ...\n",
+    "    def nn_linear(self, X: Tensor, y: Tensor, lr=0.01, steps=1000) -> tuple[Tensor, Tensor]: ...\n",
+    "```\n",
+    "\n",
+    "All methods return `(w, b)` where `w` has shape `(D,)` and `b` has shape `()`.\n",
+    "\n",
+    "### Method 1 — Closed-Form (Normal Equation)\n",
+    "Augment X with a ones column, then solve:\n",
+    "\n",
+    "$$\\theta = (X_{aug}^T X_{aug})^{-1} X_{aug}^T y$$\n",
+    "\n",
+    "Or use `torch.linalg.lstsq` / `torch.linalg.solve`.\n",
+    "\n",
+    "### Method 2 — Gradient Descent from Scratch\n",
+    "Initialize `w` and `b` to zeros. Repeat for `steps` iterations:\n",
+    "```\n",
+    "pred = X @ w + b\n",
+    "error = pred - y\n",
+    "grad_w = (2/N) * X^T @ error\n",
+    "grad_b = (2/N) * error.sum()\n",
+    "w -= lr * grad_w\n",
+    "b -= lr * grad_b\n",
+    "```\n",
+    "\n",
+    "### Method 3 — PyTorch nn.Linear\n",
+    "Create `nn.Linear(D, 1)`, use `nn.MSELoss` and an optimizer (e.g., `torch.optim.SGD`).\n",
+    "After training, extract `w` and `b` from the layer.\n",
+    "\n",
+    "### Rules\n",
+    "- All inputs and outputs must be **PyTorch tensors**\n",
+    "- Do **NOT** use numpy or sklearn\n",
+    "- `closed_form` must not use iterative optimization\n",
+    "- `gradient_descent` must manually compute gradients (no `autograd`)\n",
+    "- `nn_linear` should use `torch.nn.Linear` and `loss.backward()`"
+   ]
   },
-  "nbformat": 4,
-  "nbformat_minor": 4
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "source": [
+    "# Install torch-judge in Colab (no-op in JupyterLab/Docker)\n",
+    "try:\n",
+    "    import google.colab\n",
+    "    get_ipython().run_line_magic('pip', 'install -q torch-judge')\n",
+    "except ImportError:\n",
+    "    pass\n"
+   ],
+   "outputs": [],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "import torch.nn as nn"
+   ],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✏️ YOUR IMPLEMENTATION HERE\n",
+    "\n",
+    "class LinearRegression:\n",
+    "    def closed_form(self, X: torch.Tensor, y: torch.Tensor):\n",
+    "        \"\"\"Normal equation: w = (X^T X)^{-1} X^T y\"\"\"\n",
+    "        pass  # Return (w, b)\n",
+    "\n",
+    "    def gradient_descent(self, X: torch.Tensor, y: torch.Tensor,\n",
+    "                         lr: float = 0.01, steps: int = 1000):\n",
+    "        \"\"\"Manual gradient descent loop\"\"\"\n",
+    "        pass  # Return (w, b)\n",
+    "\n",
+    "    def nn_linear(self, X: torch.Tensor, y: torch.Tensor,\n",
+    "                  lr: float = 0.01, steps: int = 1000):\n",
+    "        \"\"\"Train nn.Linear with autograd\"\"\"\n",
+    "        pass  # Return (w, b)"
+   ],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# 🧪 Debug\n",
+    "torch.manual_seed(42)\n",
+    "X = torch.randn(100, 3)\n",
+    "true_w = torch.tensor([2.0, -1.0, 0.5])\n",
+    "y = X @ true_w + 3.0\n",
+    "\n",
+    "model = LinearRegression()\n",
+    "\n",
+    "w_cf, b_cf = model.closed_form(X, y)\n",
+    "print(f\"Closed-form:  w={w_cf}, b={b_cf.item():.4f}\")\n",
+    "\n",
+    "w_gd, b_gd = model.gradient_descent(X, y, lr=0.05, steps=2000)\n",
+    "print(f\"Grad descent: w={w_gd}, b={b_gd.item():.4f}\")\n",
+    "\n",
+    "w_nn, b_nn = model.nn_linear(X, y, lr=0.05, steps=2000)\n",
+    "print(f\"nn.Linear:    w={w_nn}, b={b_nn.item():.4f}\")\n",
+    "\n",
+    "print(f\"\\nTrue:         w={true_w}, b=3.0\")"
+   ],
+   "execution_count": null
+  },
+  {
+   "cell_type": "code",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ✅ SUBMIT\n",
+    "from torch_judge import check\n",
+    "check(\"linear_regression\")"
+   ],
+   "execution_count": null
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "name": "python",
+   "version": "3.11.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
 }