asdf98
/

BokehFlow

@@ -5,17 +5,18 @@
    "metadata": {},
    "source": [
     "# 🎬 BokehFlow Training Notebook\n",
-    "## Zero-download streaming — starts training in ~5 seconds\n",
     "\n",
-    "**How it works:** Metadata (3960 tiny JSONs) fetched async in 3s. Images streamed on-demand via HTTP during training. **Zero disk usage, zero wait.**\n",
     "\n",
-    "| Platform | GPU | Batch/s | Notes |\n",
-    "|----------|-----|---------|-------|\n",
-    "| Colab Free | T4 16GB | ~2-3s | 4 workers, prefetch hides latency |\n",
-    "| Kaggle | 2×T4 | ~1.5s | DataParallel + 8 workers |\n",
-    "| Colab Pro | A100 | ~1s | 8 workers |\n",
     "\n",
-    "**Just run all cells. No config changes needed.**"
    ]
   },
   {
@@ -24,8 +25,8 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 0: Install (15s)\n",
-    "!pip install -q torch torchvision Pillow huggingface_hub tqdm aiohttp"
    ]
   },
   {
@@ -34,10 +35,10 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 1: Download BokehFlow model code (2s)\n",
     "from huggingface_hub import hf_hub_download\n",
     "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
-    "print('✓ BokehFlow downloaded')"
    ]
   },
   {
@@ -48,26 +49,33 @@
    "source": [
     "#@title Step 2: Config\n",
     "CONFIG = {\n",
     "    'variant': 'nano',        # 'nano'=583K, 'small'=3.1M, 'base'=12M\n",
     "    'batch_size': 4,          # 4 for T4, 8 for A100\n",
-    "    'crop_size': 256,         # Training crop size\n",
-    "    'num_epochs': 5,\n",
     "    'lr': 3e-4,\n",
     "    'weight_decay': 0.05,\n",
     "    'max_grad_norm': 1.0,\n",
-    "    'num_workers': 4,         # 4 for Colab, 8 for Kaggle\n",
-    "    'target_fstop': 2.0,      # Train on max bokeh (f/2.0)\n",
-    "    'max_samples': None,      # None=all 3958, or set 200 for quick test\n",
     "    'output_dir': './checkpoints',\n",
     "}\n",
     "\n",
-    "import torch\n",
     "NUM_GPUS = torch.cuda.device_count()\n",
     "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
     "print(f'Device: {DEVICE}' + (f' ({torch.cuda.get_device_name(0)})' if torch.cuda.is_available() else ''))\n",
     "if NUM_GPUS > 1:\n",
-    "    CONFIG['num_workers'] = 8\n",
-    "    print(f'Kaggle dual-GPU detected → {NUM_GPUS} GPUs, {CONFIG[\"num_workers\"]} workers')"
    ]
   },
   {
@@ -76,88 +84,158 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 3: Streaming Dataset — NO download, starts in ~3s\n",
-    "import asyncio, aiohttp, json, io, os, random, time, requests\n",
-    "from PIL import Image\n",
-    "from torch.utils.data import Dataset, DataLoader\n",
-    "from torchvision import transforms\n",
-    "from concurrent.futures import ThreadPoolExecutor\n",
     "\n",
     "HF_BASE = 'https://huggingface.co/datasets/timseizinger/RealBokeh_3MP/resolve/main'\n",
     "\n",
-    "# ---- Async metadata fetch (3960 JSONs in ~3s) ----\n",
-    "async def _fetch_all_metadata(split='train', concurrency=50):\n",
-    "    split_counts = {'train': 3960, 'validation': 220, 'test': 220}\n",
-    "    n = split_counts.get(split, 220)\n",
-    "    async def fetch_one(session, sem, sid):\n",
     "        async with sem:\n",
-    "            url = f'{HF_BASE}/{split}/metadata/{sid}.json'\n",
     "            try:\n",
     "                async with session.get(url) as r:\n",
-    "                    if r.status == 200:\n",
-    "                        return await r.json(content_type=None)\n",
-    "            except:\n",
-    "                pass\n",
     "            return None\n",
-    "    sem = asyncio.Semaphore(concurrency)\n",
-    "    conn = aiohttp.TCPConnector(limit=concurrency, force_close=False)\n",
-    "    async with aiohttp.ClientSession(connector=conn) as session:\n",
-    "        results = await asyncio.gather(*[fetch_one(session, sem, i) for i in range(1, n+1)])\n",
-    "    return [r for r in results if r is not None]\n",
-    "\n",
-    "def _build_pairs(metas, split, target_fstop=None):\n",
-    "    pairs = []\n",
-    "    for m in metas:\n",
-    "        for tgt_path, tgt_av in zip(m['target_images'], m['target_avs']):\n",
-    "            if target_fstop is not None and abs(tgt_av - target_fstop) > 0.05:\n",
-    "                continue\n",
-    "            pairs.append({\n",
-    "                'input_path': f\"{split}/{m['source_image']}\",\n",
-    "                'gt_path':    f'{split}/{tgt_path}',\n",
-    "                'f_number':   tgt_av,\n",
-    "                'focal_mm':   float(m.get('focal_length', 50)),\n",
-    "                'focus_m':    float(m.get('focus_plane_distance', 2.0)),\n",
-    "            })\n",
-    "    return pairs\n",
-    "\n",
-    "def _fetch_img(path):\n",
-    "    \"\"\"HTTP fetch image → PIL. No disk write.\"\"\"\n",
-    "    r = requests.get(f'{HF_BASE}/{path}', timeout=30)\n",
     "    r.raise_for_status()\n",
-    "    return Image.open(io.BytesIO(r.content)).convert('RGB')\n",
-    "\n",
-    "class RealBokehStream(Dataset):\n",
-    "    \"\"\"Streaming dataset. Zero disk. Images fetched on-demand via HTTP.\"\"\"\n",
-    "    def __init__(self, split='train', crop_size=256, target_fstop=2.0, max_samples=None):\n",
-    "        t0 = time.time()\n",
-    "        # Async fetch all metadata (~3s)\n",
-    "        try:\n",
-    "            loop = asyncio.get_event_loop()\n",
-    "            if loop.is_running():  # Colab/Jupyter has running loop\n",
-    "                import nest_asyncio; nest_asyncio.apply()\n",
-    "        except RuntimeError:\n",
-    "            pass\n",
-    "        metas = asyncio.run(_fetch_all_metadata(split))\n",
-    "        self.pairs = _build_pairs(metas, split, target_fstop)\n",
-    "        random.shuffle(self.pairs)\n",
-    "        if max_samples:\n",
-    "            self.pairs = self.pairs[:max_samples]\n",
     "        self.crop_size = crop_size\n",
     "        self.to_tensor = transforms.ToTensor()\n",
-    "        print(f'  {split}: {len(self.pairs)} pairs ready in {time.time()-t0:.1f}s (zero disk)')\n",
     "\n",
     "    def __len__(self):\n",
     "        return len(self.pairs)\n",
     "\n",
     "    def __getitem__(self, idx):\n",
     "        p = self.pairs[idx]\n",
-    "        # Fetch input + GT concurrently (2 threads)\n",
-    "        with ThreadPoolExecutor(2) as ex:\n",
-    "            f1 = ex.submit(_fetch_img, p['input_path'])\n",
-    "            f2 = ex.submit(_fetch_img, p['gt_path'])\n",
-    "            inp, gt = f1.result(), f2.result()\n",
     "\n",
-    "        # Synchronized random crop + flip on both images\n",
     "        cs = self.crop_size\n",
     "        w, h = inp.size\n",
     "        if w >= cs and h >= cs:\n",
@@ -174,56 +252,26 @@
     "        return {\n",
     "            'input':  self.to_tensor(inp),\n",
     "            'target': self.to_tensor(gt),\n",
-    "            'f_number':        torch.tensor(p['f_number'], dtype=torch.float32),\n",
-    "            'focal_length_mm': torch.tensor(p['focal_mm'], dtype=torch.float32),\n",
-    "            'focus_distance_m':torch.tensor(p['focus_m'],  dtype=torch.float32),\n",
     "        }\n",
     "\n",
-    "# ---- Create dataset + loader ----\n",
-    "print('Fetching metadata (no images downloaded yet)...')\n",
-    "try:\n",
-    "    import nest_asyncio; nest_asyncio.apply()  # needed for Jupyter\n",
-    "except ImportError:\n",
-    "    !pip install -q nest_asyncio\n",
-    "    import nest_asyncio; nest_asyncio.apply()\n",
-    "\n",
-    "train_ds = RealBokehStream(\n",
-    "    split='train',\n",
-    "    crop_size=CONFIG['crop_size'],\n",
-    "    target_fstop=CONFIG['target_fstop'],\n",
-    "    max_samples=CONFIG['max_samples'],\n",
-    ")\n",
-    "\n",
     "train_loader = DataLoader(\n",
     "    train_ds,\n",
     "    batch_size=CONFIG['batch_size'],\n",
     "    shuffle=True,\n",
     "    num_workers=CONFIG['num_workers'],\n",
-    "    prefetch_factor=2,\n",
-    "    persistent_workers=True,\n",
     "    drop_last=True,\n",
     ")\n",
-    "print(f'✓ DataLoader: {len(train_loader)} batches/epoch, {CONFIG[\"num_workers\"]} workers')\n",
-    "print(f'  Images streamed on-the-fly. Disk usage: 0 MB')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#@title Step 4: Sanity check — fetch 1 batch\n",
-    "import time\n",
-    "t0 = time.time()\n",
     "batch = next(iter(train_loader))\n",
-    "t1 = time.time()\n",
-    "print(f'First batch fetched in {t1-t0:.1f}s')\n",
-    "print(f'  input:  {batch[\"input\"].shape}')\n",
-    "print(f'  target: {batch[\"target\"].shape}')\n",
-    "print(f'  f_number: {batch[\"f_number\"]}')\n",
-    "print(f'  focal_mm: {batch[\"focal_length_mm\"]}')\n",
-    "print(f'  focus_m:  {batch[\"focus_distance_m\"]}')"
    ]
   },
   {
@@ -233,18 +281,16 @@
    "outputs": [],
    "source": [
     "#@title Step 5: Create model\n",
-    "from bokehflow import BokehFlow, BokehFlowConfig, BokehFlowLoss, model_summary\n",
     "\n",
     "config = BokehFlowConfig(variant=CONFIG['variant'])\n",
     "model = BokehFlow(config)\n",
-    "\n",
     "if NUM_GPUS > 1:\n",
     "    model = torch.nn.DataParallel(model)\n",
-    "    print(f'DataParallel on {NUM_GPUS} GPUs')\n",
     "model = model.to(DEVICE)\n",
     "\n",
-    "total_params = sum(p.numel() for p in model.parameters())\n",
-    "print(f'\\n✓ BokehFlow-{CONFIG[\"variant\"].capitalize()}: {total_params:,} params on {DEVICE}')"
    ]
   },
   {
@@ -253,27 +299,23 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 6: Train!\n",
-    "from tqdm.auto import tqdm\n",
-    "import torch.nn.functional as F\n",
-    "\n",
     "optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])\n",
-    "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['num_epochs'] * len(train_loader))\n",
     "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
     "os.makedirs(CONFIG['output_dir'], exist_ok=True)\n",
     "\n",
-    "print(f'Training: {CONFIG[\"num_epochs\"]} epochs × {len(train_loader)} batches')\n",
-    "print(f'Images streamed from HF Hub — no disk needed\\n')\n",
     "\n",
     "for epoch in range(CONFIG['num_epochs']):\n",
     "    model.train()\n",
-    "    running_loss = 0.0\n",
-    "    t_epoch = time.time()\n",
     "    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
     "\n",
-    "    for step, batch in enumerate(pbar):\n",
-    "        inp = batch['input'].to(DEVICE)\n",
-    "        tgt = batch['target'].to(DEVICE)\n",
     "        f_num = batch['f_number'].to(DEVICE)\n",
     "        focal = batch['focal_length_mm'].to(DEVICE)\n",
     "        focus = batch['focus_distance_m'].to(DEVICE)\n",
@@ -288,20 +330,19 @@
     "        optimizer.step()\n",
     "        scheduler.step()\n",
     "\n",
-    "        running_loss += loss.item()\n",
     "        pbar.set_postfix(loss=f'{loss.item():.4f}', lr=f'{scheduler.get_last_lr()[0]:.1e}')\n",
     "\n",
-    "    avg = running_loss / len(train_loader)\n",
-    "    elapsed = time.time() - t_epoch\n",
-    "    print(f'  → avg_loss={avg:.4f}  time={elapsed:.0f}s  ({elapsed/len(train_loader):.1f}s/batch)')\n",
     "\n",
-    "    # Save checkpoint\n",
     "    state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
     "    ckpt = f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_ep{epoch+1}.pt'\n",
-    "    torch.save({'epoch': epoch+1, 'model': state, 'loss': avg, 'config': CONFIG}, ckpt)\n",
-    "    print(f'  ✓ Saved {ckpt}')\n",
     "\n",
-    "print(f'\\n✓ Training complete!')"
    ]
   },
   {
@@ -310,61 +351,32 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "#@title Step 7: Visualize result\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "model.eval()\n",
-    "sample = train_ds[0]\n",
     "with torch.no_grad():\n",
     "    out = model(\n",
-    "        sample['input'].unsqueeze(0).to(DEVICE),\n",
-    "        sample['f_number'].unsqueeze(0).to(DEVICE),\n",
-    "        sample['focal_length_mm'].unsqueeze(0).to(DEVICE),\n",
-    "        sample['focus_distance_m'].unsqueeze(0).to(DEVICE),\n",
     "    )\n",
     "\n",
-    "fig, axes = plt.subplots(1, 3, figsize=(15, 5))\n",
-    "axes[0].imshow(sample['input'].permute(1,2,0).cpu().numpy())\n",
-    "axes[0].set_title('Input (f/22 sharp)')\n",
-    "axes[1].imshow(out['bokeh'][0].permute(1,2,0).cpu().clamp(0,1).numpy())\n",
-    "axes[1].set_title('BokehFlow output')\n",
-    "axes[2].imshow(sample['target'].permute(1,2,0).cpu().numpy())\n",
-    "axes[2].set_title('Ground truth (f/2.0)')\n",
-    "for ax in axes: ax.axis('off')\n",
-    "plt.tight_layout()\n",
-    "plt.savefig('result.png', dpi=100, bbox_inches='tight')\n",
-    "plt.show()\n",
     "print('✓ Done!')"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#@title (Optional) Push trained model to HuggingFace Hub\n",
-    "# from huggingface_hub import HfApi, login\n",
-    "# login()  # paste your HF token\n",
-    "# api = HfApi()\n",
-    "# api.upload_file(\n",
-    "#     path_or_fileobj=f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_ep{CONFIG[\"num_epochs\"]}.pt',\n",
-    "#     path_in_repo=f'checkpoints/bokehflow_{CONFIG[\"variant\"]}.pt',\n",
-    "#     repo_id='YOUR_USERNAME/BokehFlow-trained',\n",
-    "# )"
-   ]
   }
  ],
  "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "name": "python",
-   "version": "3.10.0"
-  },
   "accelerator": "GPU"
  },
  "nbformat": 4,

    "metadata": {},
    "source": [
     "# 🎬 BokehFlow Training Notebook\n",
+    "## Smart download: only f/2.0 pairs, parallel, with resume\n",
     "\n",
+    "**Downloads only what's needed:**\n",
+    "| Subset | Files | Size | Download Time |\n",
+    "|--------|-------|------|---------------|\n",
+    "| 200 scenes | 400 images | ~234 MB | ~2 min |\n",
+    "| 500 scenes | 1000 images | ~586 MB | ~4 min |\n",
+    "| All 3958 | 7918 images | ~4.5 GB | ~25 min |\n",
     "\n",
+    "Default: **500 scenes (~586MB)**. Cached — re-running skips downloaded files.\n",
     "\n",
+    "**Just run all cells.**"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 0: Install\n",
+    "!pip install -q torch torchvision Pillow huggingface_hub tqdm aiohttp nest_asyncio"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 1: Download BokehFlow code\n",
     "from huggingface_hub import hf_hub_download\n",
     "hf_hub_download(repo_id='asdf98/BokehFlow', filename='bokehflow.py', local_dir='.')\n",
+    "print('✓ BokehFlow code ready')"
    ]
   },
   {
    "source": [
     "#@title Step 2: Config\n",
     "CONFIG = {\n",
+    "    # Model\n",
     "    'variant': 'nano',        # 'nano'=583K, 'small'=3.1M, 'base'=12M\n",
+    "    \n",
+    "    # Data\n",
+    "    'max_scenes': 500,        # 200=quick test(234MB), 500=good(586MB), None=all(4.5GB)\n",
+    "    'target_fstop': 2.0,\n",
+    "    'crop_size': 256,\n",
+    "    'data_dir': '/tmp/realbokeh',  # /tmp = fast SSD on Colab/Kaggle\n",
+    "    \n",
+    "    # Training\n",
     "    'batch_size': 4,          # 4 for T4, 8 for A100\n",
+    "    'num_epochs': 10,\n",
     "    'lr': 3e-4,\n",
     "    'weight_decay': 0.05,\n",
     "    'max_grad_norm': 1.0,\n",
+    "    'num_workers': 2,         # 2 for Colab, 4 for Kaggle\n",
     "    'output_dir': './checkpoints',\n",
     "}\n",
     "\n",
+    "import torch, os\n",
     "NUM_GPUS = torch.cuda.device_count()\n",
     "DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')\n",
     "print(f'Device: {DEVICE}' + (f' ({torch.cuda.get_device_name(0)})' if torch.cuda.is_available() else ''))\n",
     "if NUM_GPUS > 1:\n",
+    "    CONFIG['num_workers'] = 4\n",
+    "    CONFIG['batch_size'] = 8\n",
+    "    print(f'Multi-GPU: {NUM_GPUS} GPUs')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 3: Smart download — only f/2.0 input+GT pairs, parallel, cached\n",
+    "import asyncio, aiohttp, json, time, random\n",
+    "from pathlib import Path\n",
+    "from concurrent.futures import ThreadPoolExecutor, as_completed\n",
+    "from tqdm.auto import tqdm\n",
+    "import nest_asyncio; nest_asyncio.apply()\n",
     "\n",
     "HF_BASE = 'https://huggingface.co/datasets/timseizinger/RealBokeh_3MP/resolve/main'\n",
+    "DATA = Path(CONFIG['data_dir'])\n",
     "\n",
+    "# --- Phase 1: Fetch metadata (3s, async) ---\n",
+    "print('Phase 1: Fetching metadata...')\n",
+    "t0 = time.time()\n",
+    "\n",
+    "async def _fetch_metas(concurrency=50):\n",
+    "    sem = asyncio.Semaphore(concurrency)\n",
+    "    conn = aiohttp.TCPConnector(limit=concurrency)\n",
+    "    async def fetch(session, i):\n",
     "        async with sem:\n",
+    "            url = f'{HF_BASE}/train/metadata/{i}.json'\n",
     "            try:\n",
     "                async with session.get(url) as r:\n",
+    "                    if r.status == 200: return await r.json(content_type=None)\n",
+    "            except: pass\n",
     "            return None\n",
+    "    async with aiohttp.ClientSession(connector=conn) as s:\n",
+    "        return await asyncio.gather(*[fetch(s, i) for i in range(1, 3961)])\n",
+    "\n",
+    "metas = [m for m in asyncio.run(_fetch_metas()) if m]\n",
+    "print(f'  {len(metas)} scenes in {time.time()-t0:.1f}s')\n",
+    "\n",
+    "# Build download list: only input + f/2.0 GT\n",
+    "pairs = []\n",
+    "for m in metas:\n",
+    "    gt_path = None\n",
+    "    for tp, av in zip(m['target_images'], m['target_avs']):\n",
+    "        if abs(av - CONFIG['target_fstop']) < 0.05:\n",
+    "            gt_path = tp; break\n",
+    "    if gt_path is None: continue\n",
+    "    pairs.append({\n",
+    "        'input_rel': m['source_image'],       # e.g. 'in/1_f22.JPG'\n",
+    "        'gt_rel':    gt_path,                  # e.g. 'gt/1/1_f2.0.JPG'\n",
+    "        'f_number':  CONFIG['target_fstop'],\n",
+    "        'focal_mm':  float(m.get('focal_length', 50)),\n",
+    "        'focus_m':   float(m.get('focus_plane_distance', 2.0)),\n",
+    "    })\n",
+    "random.shuffle(pairs)\n",
+    "if CONFIG['max_scenes']:\n",
+    "    pairs = pairs[:CONFIG['max_scenes']]\n",
+    "print(f'  {len(pairs)} pairs selected for download')\n",
+    "\n",
+    "# --- Phase 2: Download images (parallel, with retry + skip cached) ---\n",
+    "print(f'\\nPhase 2: Downloading images to {DATA}...')\n",
+    "import requests\n",
+    "from requests.adapters import HTTPAdapter\n",
+    "from urllib3.util.retry import Retry\n",
+    "\n",
+    "def _make_session():\n",
+    "    \"\"\"Session with automatic retry on 429/500/503.\"\"\"\n",
+    "    s = requests.Session()\n",
+    "    retries = Retry(\n",
+    "        total=5,\n",
+    "        backoff_factor=1.0,       # 1s, 2s, 4s, 8s, 16s\n",
+    "        status_forcelist=[429, 500, 502, 503],\n",
+    "        allowed_methods=['GET'],\n",
+    "    )\n",
+    "    s.mount('https://', HTTPAdapter(max_retries=retries))\n",
+    "    # Add HF token if available (higher rate limits)\n",
+    "    hf_token = os.environ.get('HF_TOKEN', '')\n",
+    "    if hf_token:\n",
+    "        s.headers['Authorization'] = f'Bearer {hf_token}'\n",
+    "    return s\n",
+    "\n",
+    "def _download_file(rel_path, session):\n",
+    "    \"\"\"Download one file to DATA/train/{rel_path}. Skips if exists.\"\"\"\n",
+    "    local = DATA / 'train' / rel_path\n",
+    "    if local.exists() and local.stat().st_size > 1000:\n",
+    "        return 'cached'\n",
+    "    local.parent.mkdir(parents=True, exist_ok=True)\n",
+    "    url = f'{HF_BASE}/train/{rel_path}'\n",
+    "    r = session.get(url, timeout=60)\n",
     "    r.raise_for_status()\n",
+    "    local.write_bytes(r.content)\n",
+    "    return 'downloaded'\n",
+    "\n",
+    "# Collect all files to download\n",
+    "all_files = set()\n",
+    "for p in pairs:\n",
+    "    all_files.add(p['input_rel'])\n",
+    "    all_files.add(p['gt_rel'])\n",
+    "\n",
+    "# Download with 8 threads (conservative to avoid 429)\n",
+    "t0 = time.time()\n",
+    "downloaded, cached = 0, 0\n",
+    "pbar = tqdm(total=len(all_files), desc='Downloading')\n",
+    "\n",
+    "# Use thread-local sessions to avoid connection pool issues\n",
+    "import threading\n",
+    "_local = threading.local()\n",
+    "\n",
+    "def _dl(rel_path):\n",
+    "    if not hasattr(_local, 'session'):\n",
+    "        _local.session = _make_session()\n",
+    "    return _download_file(rel_path, _local.session)\n",
+    "\n",
+    "with ThreadPoolExecutor(max_workers=8) as ex:\n",
+    "    futures = {ex.submit(_dl, f): f for f in all_files}\n",
+    "    for fut in as_completed(futures):\n",
+    "        result = fut.result()\n",
+    "        if result == 'cached': cached += 1\n",
+    "        else: downloaded += 1\n",
+    "        pbar.update(1)\n",
+    "pbar.close()\n",
+    "\n",
+    "elapsed = time.time() - t0\n",
+    "print(f'\\n✓ Done in {elapsed:.0f}s: {downloaded} downloaded, {cached} cached')\n",
+    "print(f'  Disk usage: ~{sum(f.stat().st_size for f in DATA.rglob(\"*.JPG\"))/1e6:.0f} MB')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#@title Step 4: Dataset (reads from disk — fast, no network)\n",
+    "from torch.utils.data import Dataset, DataLoader\n",
+    "from torchvision import transforms\n",
+    "from PIL import Image\n",
+    "\n",
+    "class RealBokehDisk(Dataset):\n",
+    "    \"\"\"Reads pre-downloaded image pairs from disk. Zero network at training time.\"\"\"\n",
+    "    def __init__(self, pairs, data_dir, crop_size=256):\n",
+    "        self.pairs = pairs\n",
+    "        self.data_dir = Path(data_dir) / 'train'\n",
     "        self.crop_size = crop_size\n",
     "        self.to_tensor = transforms.ToTensor()\n",
+    "        # Verify a sample\n",
+    "        p = pairs[0]\n",
+    "        assert (self.data_dir / p['input_rel']).exists(), f\"Missing: {p['input_rel']}\"\n",
+    "        assert (self.data_dir / p['gt_rel']).exists(), f\"Missing: {p['gt_rel']}\"\n",
+    "        print(f'  Dataset: {len(pairs)} pairs, reading from disk (fast)')\n",
     "\n",
     "    def __len__(self):\n",
     "        return len(self.pairs)\n",
     "\n",
     "    def __getitem__(self, idx):\n",
     "        p = self.pairs[idx]\n",
+    "        inp = Image.open(self.data_dir / p['input_rel']).convert('RGB')\n",
+    "        gt  = Image.open(self.data_dir / p['gt_rel']).convert('RGB')\n",
     "\n",
+    "        # Synchronized random crop + flip\n",
     "        cs = self.crop_size\n",
     "        w, h = inp.size\n",
     "        if w >= cs and h >= cs:\n",
     "        return {\n",
     "            'input':  self.to_tensor(inp),\n",
     "            'target': self.to_tensor(gt),\n",
+    "            'f_number':         torch.tensor(p['f_number'], dtype=torch.float32),\n",
+    "            'focal_length_mm':  torch.tensor(p['focal_mm'], dtype=torch.float32),\n",
+    "            'focus_distance_m': torch.tensor(p['focus_m'],  dtype=torch.float32),\n",
     "        }\n",
     "\n",
+    "train_ds = RealBokehDisk(pairs, CONFIG['data_dir'], CONFIG['crop_size'])\n",
     "train_loader = DataLoader(\n",
     "    train_ds,\n",
     "    batch_size=CONFIG['batch_size'],\n",
     "    shuffle=True,\n",
     "    num_workers=CONFIG['num_workers'],\n",
+    "    pin_memory=True,\n",
     "    drop_last=True,\n",
+    "    persistent_workers=True,\n",
     ")\n",
+    "print(f'✓ DataLoader: {len(train_loader)} batches/epoch')\n",
+    "\n",
+    "# Quick sanity check\n",
     "batch = next(iter(train_loader))\n",
+    "print(f'  Batch shapes: input={batch[\"input\"].shape}, target={batch[\"target\"].shape}')"
    ]
   },
   {
    "outputs": [],
    "source": [
     "#@title Step 5: Create model\n",
+    "from bokehflow import BokehFlow, BokehFlowConfig, BokehFlowLoss\n",
     "\n",
     "config = BokehFlowConfig(variant=CONFIG['variant'])\n",
     "model = BokehFlow(config)\n",
     "if NUM_GPUS > 1:\n",
     "    model = torch.nn.DataParallel(model)\n",
     "model = model.to(DEVICE)\n",
     "\n",
+    "n_params = sum(p.numel() for p in model.parameters())\n",
+    "print(f'✓ BokehFlow-{CONFIG[\"variant\"].capitalize()}: {n_params:,} params on {DEVICE}')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 6: Train\n",
     "optimizer = torch.optim.AdamW(model.parameters(), lr=CONFIG['lr'], weight_decay=CONFIG['weight_decay'])\n",
+    "scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=CONFIG['num_epochs']*len(train_loader))\n",
     "criterion = BokehFlowLoss(lambda_depth=0.5)\n",
     "os.makedirs(CONFIG['output_dir'], exist_ok=True)\n",
     "\n",
+    "print(f'Training: {CONFIG[\"num_epochs\"]} epochs × {len(train_loader)} batches\\n')\n",
     "\n",
     "for epoch in range(CONFIG['num_epochs']):\n",
     "    model.train()\n",
+    "    total_loss = 0.0\n",
+    "    t0 = time.time()\n",
     "    pbar = tqdm(train_loader, desc=f'Epoch {epoch+1}/{CONFIG[\"num_epochs\"]}')\n",
     "\n",
+    "    for batch in pbar:\n",
+    "        inp   = batch['input'].to(DEVICE)\n",
+    "        tgt   = batch['target'].to(DEVICE)\n",
     "        f_num = batch['f_number'].to(DEVICE)\n",
     "        focal = batch['focal_length_mm'].to(DEVICE)\n",
     "        focus = batch['focus_distance_m'].to(DEVICE)\n",
     "        optimizer.step()\n",
     "        scheduler.step()\n",
     "\n",
+    "        total_loss += loss.item()\n",
     "        pbar.set_postfix(loss=f'{loss.item():.4f}', lr=f'{scheduler.get_last_lr()[0]:.1e}')\n",
     "\n",
+    "    avg = total_loss / len(train_loader)\n",
+    "    dt = time.time() - t0\n",
+    "    print(f'  avg_loss={avg:.4f}  time={dt:.0f}s  ({dt/len(train_loader):.2f}s/batch)')\n",
     "\n",
     "    state = model.module.state_dict() if hasattr(model, 'module') else model.state_dict()\n",
     "    ckpt = f'{CONFIG[\"output_dir\"]}/bokehflow_{CONFIG[\"variant\"]}_ep{epoch+1}.pt'\n",
+    "    torch.save({'epoch': epoch+1, 'model': state, 'loss': avg}, ckpt)\n",
+    "    print(f'  ✓ {ckpt}')\n",
     "\n",
+    "print('\\n✓ Training complete!')"
    ]
   },
   {
    "metadata": {},
    "outputs": [],
    "source": [
+    "#@title Step 7: Visualize\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
     "model.eval()\n",
+    "s = train_ds[0]\n",
     "with torch.no_grad():\n",
     "    out = model(\n",
+    "        s['input'].unsqueeze(0).to(DEVICE),\n",
+    "        s['f_number'].unsqueeze(0).to(DEVICE),\n",
+    "        s['focal_length_mm'].unsqueeze(0).to(DEVICE),\n",
+    "        s['focus_distance_m'].unsqueeze(0).to(DEVICE),\n",
     "    )\n",
     "\n",
+    "fig, ax = plt.subplots(1, 3, figsize=(15, 5))\n",
+    "ax[0].imshow(s['input'].permute(1,2,0).cpu()); ax[0].set_title('Input (f/22)')\n",
+    "ax[1].imshow(out['bokeh'][0].permute(1,2,0).cpu().clamp(0,1)); ax[1].set_title('BokehFlow')\n",
+    "ax[2].imshow(s['target'].permute(1,2,0).cpu()); ax[2].set_title('GT (f/2.0)')\n",
+    "for a in ax: a.axis('off')\n",
+    "plt.tight_layout(); plt.savefig('result.png', dpi=100); plt.show()\n",
     "print('✓ Done!')"
    ]
   }
  ],
  "metadata": {
+  "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"},
+  "language_info": {"name": "python", "version": "3.10.0"},
   "accelerator": "GPU"
  },
  "nbformat": 4,