Spaces:

Ramnie
/

sae-explorer

Running

App Files Files Community

Marlin Lee commited on 25 days ago

Commit

cf75c2d

1 Parent(s): cdaf9dc

Sync explorer_app.py and clip_utils.py from main repo

Browse files

Files changed (1) hide show

scripts/explorer_app.py +46 -5

scripts/explorer_app.py CHANGED Viewed

@@ -100,6 +100,11 @@ parser.add_argument("--clip-model", type=str, default="openai/clip-vit-large-pat
 parser.add_argument("--google-api-key", type=str, default=None,
                     help="Google API key for Gemini auto-interp button "
                          "(default: GOOGLE_API_KEY env var)")
 args = parser.parse_args()
@@ -120,7 +125,7 @@ def _get_clip():
 # ---------- Load all datasets into a unified list ----------
-def _load_dataset_dict(path, label):
     """Load one explorer_data.pt file and return a unified dataset dict."""
     print(f"Loading [{label}] from {path} ...")
     d = torch.load(path, map_location='cpu', weights_only=False)
@@ -203,6 +208,8 @@ def _load_dataset_dict(path, label):
     else:
         entry['patch_acts'] = None
     print(f"  d={entry['d_model']}, n={entry['n_images']}, token={entry['token_type']}, "
           f"backbone={entry['backbone']}, clip={'yes' if cs is not None else 'no'}, "
           f"heatmaps={has_hm}, patch_acts={'yes' if entry['patch_acts'] else 'no'}")
@@ -213,14 +220,17 @@ _all_datasets = []
 _active = [0]   # index of the currently displayed dataset
 # Primary dataset — always loaded eagerly
-_all_datasets.append(_load_dataset_dict(args.data, args.primary_label))
 # Compare datasets — stored as lazy placeholders; loaded on first access
 for _ci, _cpath in enumerate(args.compare_data):
     _clabel = (args.compare_labels[_ci]
                if args.compare_labels and _ci < len(args.compare_labels)
                else os.path.basename(_cpath))
-    _all_datasets.append({'label': _clabel, 'path': _cpath, '_lazy': True})
 def _ensure_loaded(idx):
@@ -228,7 +238,7 @@ def _ensure_loaded(idx):
     ds = _all_datasets[idx]
     if ds.get('_lazy', False):
         print(f"[Lazy load] Loading '{ds['label']}' on first access ...")
-        _all_datasets[idx] = _load_dataset_dict(ds['path'], ds['label'])
 def _apply_dataset_globals(idx):
@@ -1499,6 +1509,37 @@ def _make_summary_html():
 summary_div = Div(text=_make_summary_html(), width=700)
 # ---------- Patch Explorer ----------
 # Click patches of an image to find the top active SAE features for that region.
@@ -1876,7 +1917,7 @@ patch_explorer_panel = column(
     patch_feat_table,
 )
-summary_section = _make_collapsible("SAE Summary",      summary_div)
 patch_section   = _make_collapsible("Patch Explorer",   patch_explorer_panel)
 clip_section    = _make_collapsible("CLIP Text Search", clip_search_panel)

 parser.add_argument("--google-api-key", type=str, default=None,
                     help="Google API key for Gemini auto-interp button "
                          "(default: GOOGLE_API_KEY env var)")
+parser.add_argument("--sae-path", type=str, default=None,
+                    help="Path to SAE weights (.pth) for the primary dataset — "
+                         "enables the Download SAE weights button in the summary panel")
+parser.add_argument("--compare-sae-paths", type=str, nargs="*", default=[],
+                    help="SAE weight paths for each --compare-data dataset (in order)")
 args = parser.parse_args()
 # ---------- Load all datasets into a unified list ----------
+def _load_dataset_dict(path, label, sae_path=None):
     """Load one explorer_data.pt file and return a unified dataset dict."""
     print(f"Loading [{label}] from {path} ...")
     d = torch.load(path, map_location='cpu', weights_only=False)
     else:
         entry['patch_acts'] = None
+    entry['sae_path'] = sae_path
     print(f"  d={entry['d_model']}, n={entry['n_images']}, token={entry['token_type']}, "
           f"backbone={entry['backbone']}, clip={'yes' if cs is not None else 'no'}, "
           f"heatmaps={has_hm}, patch_acts={'yes' if entry['patch_acts'] else 'no'}")
 _active = [0]   # index of the currently displayed dataset
 # Primary dataset — always loaded eagerly
+_all_datasets.append(_load_dataset_dict(args.data, args.primary_label, sae_path=args.sae_path))
 # Compare datasets — stored as lazy placeholders; loaded on first access
 for _ci, _cpath in enumerate(args.compare_data):
     _clabel = (args.compare_labels[_ci]
                if args.compare_labels and _ci < len(args.compare_labels)
                else os.path.basename(_cpath))
+    _csae = (args.compare_sae_paths[_ci]
+             if args.compare_sae_paths and _ci < len(args.compare_sae_paths)
+             else None)
+    _all_datasets.append({'label': _clabel, 'path': _cpath, '_lazy': True, 'sae_path': _csae})
 def _ensure_loaded(idx):
     ds = _all_datasets[idx]
     if ds.get('_lazy', False):
         print(f"[Lazy load] Loading '{ds['label']}' on first access ...")
+        _all_datasets[idx] = _load_dataset_dict(ds['path'], ds['label'], sae_path=ds.get('sae_path'))
 def _apply_dataset_globals(idx):
 summary_div = Div(text=_make_summary_html(), width=700)
+# --- SAE weights download button ---
+_download_source = ColumnDataSource(data=dict(b64=[''], filename=['']))
+_download_source.js_on_change('data', CustomJS(args=dict(src=_download_source), code="""
+    const b64 = src.data['b64'][0];
+    const fname = src.data['filename'][0];
+    if (!b64) return;
+    const bytes = Uint8Array.from(atob(b64), c => c.charCodeAt(0));
+    const blob = new Blob([bytes], {type: 'application/octet-stream'});
+    const url = URL.createObjectURL(blob);
+    const a = document.createElement('a');
+    a.href = url; a.download = fname; a.click();
+    URL.revokeObjectURL(url);
+    src.data = {b64: [''], filename: ['']};
+"""))
+sae_download_btn = Button(label="\u2b07 Download SAE weights", button_type="default", width=220)
+def _on_sae_download():
+    ds = _all_datasets[_active[0]]
+    sae_path = ds.get('sae_path')
+    if not sae_path or not os.path.exists(sae_path):
+        status_div.text = "<b style='color:red'>No SAE path set for this model. Pass --sae-path.</b>"
+        return
+    status_div.text = f"<b>Reading {os.path.basename(sae_path)}…</b>"
+    with open(sae_path, 'rb') as f:
+        b64 = base64.b64encode(f.read()).decode('ascii')
+    _download_source.data = dict(b64=[b64], filename=[os.path.basename(sae_path)])
+    status_div.text = ""
+sae_download_btn.on_click(lambda: _on_sae_download())
 # ---------- Patch Explorer ----------
 # Click patches of an image to find the top active SAE features for that region.
     patch_feat_table,
 )
+summary_section = _make_collapsible("SAE Summary",      column(summary_div, sae_download_btn))
 patch_section   = _make_collapsible("Patch Explorer",   patch_explorer_panel)
 clip_section    = _make_collapsible("CLIP Text Search", clip_search_panel)