Spaces:

tahoebio
/

tx1-demo

Running on A10G

App Files Files Community

Umair Khan commited on about 1 month ago

Commit

b10cf8e

1 Parent(s): 0811027

revise UI

Browse files

Files changed (1) hide show

app.py +9 -10

app.py CHANGED Viewed

@@ -26,9 +26,8 @@ EMB_KEY = "X_tx1-70m"
 APP_TITLE = "Tx1-70M Embeddings"
 APP_DESC = """
 Upload an AnnData, compute Tx1-70M embeddings,
-preview a UMAP, and download the results.
-**Limits:** Files up to 5GB. If an AnnData contains more
 than 50K cells, embeddings will be computed **only
 for the first 50K cells**.
 """
@@ -126,7 +125,7 @@ def ensure_dropdowns(fileobj):
 # custom callback to report progress to Gradio
 class GradioProgressCallback(Callback):
-    def __init__(self, progress, total_batches, start=0.35, end=0.75):
         self.progress = progress
         self.total = max(1, int(total_batches))
         self.seen = 0
@@ -142,7 +141,7 @@ class GradioProgressCallback(Callback):
 def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
     # retrieve AnnData from bytes
-    progress(0.12, desc="loading AnnData")
     with tempfile.TemporaryDirectory() as td:
         # persist to a temporary file
@@ -230,7 +229,7 @@ def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
         raise gr.Error(f"Feature column '{feature_col}' does not appear to contain Ensembl gene IDs. If the column contains gene symbols, use the checkbox.")
     # load model
-    progress(0.22, desc="loading model")
     model, vocab, _, collator_config = ComposerTX.from_hf(
         "tahoebio/TahoeX1",
         "70m",
@@ -238,7 +237,7 @@ def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
     )
     # prepare AnnData
-    progress(0.30, desc="preparing AnnData")
     gene_id_key = feature_col
     adata.var["id_in_vocab"] = [vocab[gene] if gene in vocab else -1 for gene in adata.var[gene_id_key]]
     gene_ids_in_vocab = np.array(adata.var["id_in_vocab"])
@@ -252,7 +251,7 @@ def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
     gene_ids = np.array([vocab[gene] for gene in genes], dtype=int)
     # create data loader
-    progress(0.35, desc="creating data loader")
     count_matrix = _pick_layer(adata, layer_name)
     dataset = CountDataset(
         count_matrix,
@@ -349,7 +348,7 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, use_symbols, pro
         raise gr.Error("Please select a .var column.")
     # read upload file to bytes so the GPU function can load it
-    progress(0.05, desc="reading AnnData")
     with open(fileobj.name, "rb") as f:
         adata_bytes = f.read()
@@ -370,7 +369,7 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, use_symbols, pro
         adata = sc.read_h5ad(tmp_in, backed=None)
     # compute UMAP
-    progress(0.85, desc="computing UMAP")
     color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
     coords = _compute_umap_from_emb(E)
     adata.obsm["X_umap"] = coords

 APP_TITLE = "Tx1-70M Embeddings"
 APP_DESC = """
 Upload an AnnData, compute Tx1-70M embeddings,
+preview a UMAP, and download the results. **Limits:**
+Files up to 5GB. If an AnnData contains more
 than 50K cells, embeddings will be computed **only
 for the first 50K cells**.
 """
 # custom callback to report progress to Gradio
 class GradioProgressCallback(Callback):
+    def __init__(self, progress, total_batches, start=0.25, end=0.75):
         self.progress = progress
         self.total = max(1, int(total_batches))
         self.seen = 0
 def _embed(adata_bytes, layer_name, feature_col, use_symbols, progress):
     # retrieve AnnData from bytes
+    progress(0.05, desc="loading AnnData")
     with tempfile.TemporaryDirectory() as td:
         # persist to a temporary file
         raise gr.Error(f"Feature column '{feature_col}' does not appear to contain Ensembl gene IDs. If the column contains gene symbols, use the checkbox.")
     # load model
+    progress(0.15, desc="loading model")
     model, vocab, _, collator_config = ComposerTX.from_hf(
         "tahoebio/TahoeX1",
         "70m",
     )
     # prepare AnnData
+    progress(0.20, desc="preparing AnnData")
     gene_id_key = feature_col
     adata.var["id_in_vocab"] = [vocab[gene] if gene in vocab else -1 for gene in adata.var[gene_id_key]]
     gene_ids_in_vocab = np.array(adata.var["id_in_vocab"])
     gene_ids = np.array([vocab[gene] for gene in genes], dtype=int)
     # create data loader
+    progress(0.22, desc="creating data loader")
     count_matrix = _pick_layer(adata, layer_name)
     dataset = CountDataset(
         count_matrix,
         raise gr.Error("Please select a .var column.")
     # read upload file to bytes so the GPU function can load it
+    progress(0.02, desc="reading AnnData")
     with open(fileobj.name, "rb") as f:
         adata_bytes = f.read()
         adata = sc.read_h5ad(tmp_in, backed=None)
     # compute UMAP
+    progress(0.80, desc="computing UMAP")
     color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
     coords = _compute_umap_from_emb(E)
     adata.obsm["X_umap"] = coords