Umair Khan
commited on
Commit
·
b10cf8e
1
Parent(s):
0811027
revise UI
Browse files
app.py
CHANGED
|
@@ -26,9 +26,8 @@ EMB_KEY = "X_tx1-70m"
|
|
| 26 |
APP_TITLE = "Tx1-70M Embeddings"
|
| 27 |
APP_DESC = """
|
| 28 |
Upload an AnnData, compute Tx1-70M embeddings,
|
| 29 |
-
preview a UMAP, and download the results.
|
| 30 |
-
|
| 31 |
-
**Limits:** Files up to 5GB. If an AnnData contains more
|
| 32 |
than 50K cells, embeddings will be computed **only
|
| 33 |
for the first 50K cells**.
|
| 34 |
"""
|
|
@@ -126,7 +125,7 @@ def ensure_dropdowns(fileobj):
|
|
| 126 |
|
| 127 |
# custom callback to report progress to Gradio
|
| 128 |
class GradioProgressCallback(Callback):
|
| 129 |
-
def __init__(self, progress, total_batches, start=0.
|
| 130 |
self.progress = progress
|
| 131 |
self.total = max(1, int(total_batches))
|
| 132 |
self.seen = 0
|
|
@@ -142,7 +141,7 @@ class GradioProgressCallback(Callback):
|
|
| 142 |
def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
|
| 143 |
|
| 144 |
# retrieve AnnData from bytes
|
| 145 |
-
progress(0.
|
| 146 |
with tempfile.TemporaryDirectory() as td:
|
| 147 |
|
| 148 |
# persist to a temporary file
|
|
@@ -230,7 +229,7 @@ def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
|
|
| 230 |
raise gr.Error(f"Feature column '{feature_col}' does not appear to contain Ensembl gene IDs. If the column contains gene symbols, use the checkbox.")
|
| 231 |
|
| 232 |
# load model
|
| 233 |
-
progress(0.
|
| 234 |
model, vocab, _, collator_config = ComposerTX.from_hf(
|
| 235 |
"tahoebio/TahoeX1",
|
| 236 |
"70m",
|
|
@@ -238,7 +237,7 @@ def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
|
|
| 238 |
)
|
| 239 |
|
| 240 |
# prepare AnnData
|
| 241 |
-
progress(0.
|
| 242 |
gene_id_key = feature_col
|
| 243 |
adata.var["id_in_vocab"] = [vocab[gene] if gene in vocab else -1 for gene in adata.var[gene_id_key]]
|
| 244 |
gene_ids_in_vocab = np.array(adata.var["id_in_vocab"])
|
|
@@ -252,7 +251,7 @@ def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
|
|
| 252 |
gene_ids = np.array([vocab[gene] for gene in genes], dtype=int)
|
| 253 |
|
| 254 |
# create data loader
|
| 255 |
-
progress(0.
|
| 256 |
count_matrix = _pick_layer(adata, layer_name)
|
| 257 |
dataset = CountDataset(
|
| 258 |
count_matrix,
|
|
@@ -349,7 +348,7 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, use_symbols, pro
|
|
| 349 |
raise gr.Error("Please select a .var column.")
|
| 350 |
|
| 351 |
# read upload file to bytes so the GPU function can load it
|
| 352 |
-
progress(0.
|
| 353 |
with open(fileobj.name, "rb") as f:
|
| 354 |
adata_bytes = f.read()
|
| 355 |
|
|
@@ -370,7 +369,7 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, use_symbols, pro
|
|
| 370 |
adata = sc.read_h5ad(tmp_in, backed=None)
|
| 371 |
|
| 372 |
# compute UMAP
|
| 373 |
-
progress(0.
|
| 374 |
color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
|
| 375 |
coords = _compute_umap_from_emb(E)
|
| 376 |
adata.obsm["X_umap"] = coords
|
|
|
|
| 26 |
APP_TITLE = "Tx1-70M Embeddings"
|
| 27 |
APP_DESC = """
|
| 28 |
Upload an AnnData, compute Tx1-70M embeddings,
|
| 29 |
+
preview a UMAP, and download the results. **Limits:**
|
| 30 |
+
Files up to 5GB. If an AnnData contains more
|
|
|
|
| 31 |
than 50K cells, embeddings will be computed **only
|
| 32 |
for the first 50K cells**.
|
| 33 |
"""
|
|
|
|
| 125 |
|
| 126 |
# custom callback to report progress to Gradio
|
| 127 |
class GradioProgressCallback(Callback):
|
| 128 |
+
def __init__(self, progress, total_batches, start=0.25, end=0.75):
|
| 129 |
self.progress = progress
|
| 130 |
self.total = max(1, int(total_batches))
|
| 131 |
self.seen = 0
|
|
|
|
| 141 |
def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
|
| 142 |
|
| 143 |
# retrieve AnnData from bytes
|
| 144 |
+
progress(0.05, desc="loading AnnData")
|
| 145 |
with tempfile.TemporaryDirectory() as td:
|
| 146 |
|
| 147 |
# persist to a temporary file
|
|
|
|
| 229 |
raise gr.Error(f"Feature column '{feature_col}' does not appear to contain Ensembl gene IDs. If the column contains gene symbols, use the checkbox.")
|
| 230 |
|
| 231 |
# load model
|
| 232 |
+
progress(0.15, desc="loading model")
|
| 233 |
model, vocab, _, collator_config = ComposerTX.from_hf(
|
| 234 |
"tahoebio/TahoeX1",
|
| 235 |
"70m",
|
|
|
|
| 237 |
)
|
| 238 |
|
| 239 |
# prepare AnnData
|
| 240 |
+
progress(0.20, desc="preparing AnnData")
|
| 241 |
gene_id_key = feature_col
|
| 242 |
adata.var["id_in_vocab"] = [vocab[gene] if gene in vocab else -1 for gene in adata.var[gene_id_key]]
|
| 243 |
gene_ids_in_vocab = np.array(adata.var["id_in_vocab"])
|
|
|
|
| 251 |
gene_ids = np.array([vocab[gene] for gene in genes], dtype=int)
|
| 252 |
|
| 253 |
# create data loader
|
| 254 |
+
progress(0.22, desc="creating data loader")
|
| 255 |
count_matrix = _pick_layer(adata, layer_name)
|
| 256 |
dataset = CountDataset(
|
| 257 |
count_matrix,
|
|
|
|
| 348 |
raise gr.Error("Please select a .var column.")
|
| 349 |
|
| 350 |
# read upload file to bytes so the GPU function can load it
|
| 351 |
+
progress(0.02, desc="reading AnnData")
|
| 352 |
with open(fileobj.name, "rb") as f:
|
| 353 |
adata_bytes = f.read()
|
| 354 |
|
|
|
|
| 369 |
adata = sc.read_h5ad(tmp_in, backed=None)
|
| 370 |
|
| 371 |
# compute UMAP
|
| 372 |
+
progress(0.80, desc="computing UMAP")
|
| 373 |
color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
|
| 374 |
coords = _compute_umap_from_emb(E)
|
| 375 |
adata.obsm["X_umap"] = coords
|