mckell commited on
Commit
58e5bac
·
verified ·
1 Parent(s): 95cd384

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +30 -5
  2. requirements.txt +1 -1
app.py CHANGED
@@ -15,8 +15,23 @@ Environment variables:
15
  """
16
 
17
  import os
 
 
18
  from pathlib import Path
19
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  import spaces
21
 
22
  # Data source configuration
@@ -83,6 +98,14 @@ def download_checkpoint(output_dir: Path, model: str) -> None:
83
  print(" Generation will be disabled without checkpoint")
84
 
85
 
 
 
 
 
 
 
 
 
86
  def regenerate_umap(data_dir: Path, model: str) -> bool:
87
  """Regenerate UMAP pickle for a model to ensure numba compatibility.
88
 
@@ -122,24 +145,26 @@ def regenerate_umap(data_dir: Path, model: str) -> bool:
122
  with open(json_path, "r") as f:
123
  umap_params = json.load(f)
124
 
 
125
  print(f" Regenerating UMAP for {model}...")
126
- print(f" Params: n_neighbors={umap_params.get('n_neighbors', 15)}, min_dist={umap_params.get('min_dist', 0.1)}")
127
 
128
  try:
129
  # Load activations
130
  activations, metadata_df = load_dataset_activations(activation_dir, metadata_path)
131
- print(f" Loaded {activations.shape[0]} activations")
132
 
133
- # Compute UMAP
134
- embeddings, reducer, scaler = compute_umap(
135
  activations,
136
  n_neighbors=umap_params.get("n_neighbors", 15),
137
  min_dist=umap_params.get("min_dist", 0.1),
138
  normalize=True,
 
139
  )
140
 
141
  # Save (overwrites existing pickle with compatible version)
142
- save_embeddings(embeddings, metadata_df, csv_path, umap_params, reducer, scaler)
143
  print(f" UMAP pickle regenerated: {pkl_path}")
144
  return True
145
 
 
15
  """
16
 
17
  import os
18
+ import subprocess
19
+ import sys
20
  from pathlib import Path
21
 
22
+ # Install diffviews from git to bypass pip cache issues
23
+ _REPO_URL = "https://github.com/mckellcarter/diffviews.git"
24
+ _REPO_BRANCH = os.environ.get("DIFFVIEWS_BRANCH", "feature/modal-transition")
25
+ _REPO_DIR = "/tmp/diffviews"
26
+
27
+ if not os.path.exists(_REPO_DIR):
28
+ print(f"Cloning diffviews from {_REPO_BRANCH}...")
29
+ subprocess.run(
30
+ ["git", "clone", "--depth=1", "-b", _REPO_BRANCH, _REPO_URL, _REPO_DIR],
31
+ check=True,
32
+ )
33
+ sys.path.insert(0, _REPO_DIR)
34
+
35
  import spaces
36
 
37
  # Data source configuration
 
98
  print(" Generation will be disabled without checkpoint")
99
 
100
 
101
+ def get_pca_components() -> int | None:
102
+ """Read PCA pre-reduction setting from env. None = disabled."""
103
+ val = os.environ.get("DIFFVIEWS_PCA_COMPONENTS", "50")
104
+ if val.lower() in ("0", "none", "off", ""):
105
+ return None
106
+ return int(val)
107
+
108
+
109
  def regenerate_umap(data_dir: Path, model: str) -> bool:
110
  """Regenerate UMAP pickle for a model to ensure numba compatibility.
111
 
 
145
  with open(json_path, "r") as f:
146
  umap_params = json.load(f)
147
 
148
+ pca_components = get_pca_components()
149
  print(f" Regenerating UMAP for {model}...")
150
+ print(f" Params: n_neighbors={umap_params.get('n_neighbors', 15)}, min_dist={umap_params.get('min_dist', 0.1)}, pca={pca_components}")
151
 
152
  try:
153
  # Load activations
154
  activations, metadata_df = load_dataset_activations(activation_dir, metadata_path)
155
+ print(f" Loaded {activations.shape[0]} activations ({activations.shape[1]} dims)")
156
 
157
+ # Compute UMAP (with optional PCA pre-reduction)
158
+ embeddings, reducer, scaler, pca_reducer = compute_umap(
159
  activations,
160
  n_neighbors=umap_params.get("n_neighbors", 15),
161
  min_dist=umap_params.get("min_dist", 0.1),
162
  normalize=True,
163
+ pca_components=pca_components,
164
  )
165
 
166
  # Save (overwrites existing pickle with compatible version)
167
+ save_embeddings(embeddings, metadata_df, csv_path, umap_params, reducer, scaler, pca_reducer)
168
  print(f" UMAP pickle regenerated: {pkl_path}")
169
  return True
170
 
requirements.txt CHANGED
@@ -1,6 +1,6 @@
1
  # DiffViews - HuggingFace Spaces Requirements
2
  # Requires Python 3.10+
3
- git+https://github.com/mckellcarter/diffviews.git@920ef67
4
 
5
  # Core dependencies
6
  torch>=2.0.0
 
1
  # DiffViews - HuggingFace Spaces Requirements
2
  # Requires Python 3.10+
3
+ # Package installed via git clone in app.py (bypasses pip cache)
4
 
5
  # Core dependencies
6
  torch>=2.0.0