Umair Khan
commited on
Commit
·
c4659e3
1
Parent(s):
4e13dd6
update model naming
Browse files- app.py +11 -11
- mosaicfm-0.1.2-py3-none-any.whl +0 -0
- tahoex-0.1.2-py3-none-any.whl +0 -0
app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
# install custom package
|
| 2 |
import os
|
| 3 |
-
os.system("pip install --no-deps ./
|
| 4 |
|
| 5 |
# imports
|
| 6 |
import gc
|
|
@@ -16,9 +16,9 @@ import scanpy as sc
|
|
| 16 |
from pathlib import Path
|
| 17 |
from composer import Trainer, Callback
|
| 18 |
from omegaconf import OmegaConf as om
|
| 19 |
-
from
|
| 20 |
-
from
|
| 21 |
-
from
|
| 22 |
|
| 23 |
# hardcoded configuration
|
| 24 |
EMB_KEY = "X_tx1-70m"
|
|
@@ -109,7 +109,7 @@ class GradioProgressCallback(Callback):
|
|
| 109 |
def predict_batch_end(self, state, logger):
|
| 110 |
self.seen += 1
|
| 111 |
frac = self.start + (self.end - self.start) * (self.seen / self.total)
|
| 112 |
-
self.progress(frac, desc=f"computing Tx1 embeddings ({self.seen} / {self.total} batches)
|
| 113 |
|
| 114 |
# compute embeddings
|
| 115 |
def _embed(adata_bytes, layer_name, feature_col, progress):
|
|
@@ -158,7 +158,7 @@ def _embed(adata_bytes, layer_name, feature_col, progress):
|
|
| 158 |
vocab = GeneVocab.from_file(vocab_path)
|
| 159 |
|
| 160 |
# load model
|
| 161 |
-
model =
|
| 162 |
model.load_state_dict(torch.load(ckpt)["state"]["model"], strict=strict)
|
| 163 |
model.to("cuda")
|
| 164 |
model.eval()
|
|
@@ -276,12 +276,12 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, progress=gr.Prog
|
|
| 276 |
raise gr.Error("Please select a .var column.")
|
| 277 |
|
| 278 |
# read upload file to bytes so the GPU function can load it
|
| 279 |
-
progress(0.05, desc="reading AnnData
|
| 280 |
with open(fileobj.name, "rb") as f:
|
| 281 |
adata_bytes = f.read()
|
| 282 |
|
| 283 |
# compute embeddings on GPU
|
| 284 |
-
progress(0.10, desc="computing Tx1 embeddings
|
| 285 |
E, layers, var_cols, obs_cols, adata_with_emb_bytes = _embed(
|
| 286 |
adata_bytes=adata_bytes,
|
| 287 |
layer_name=(None if layer_choice in [None, "", "<use .X>"] else layer_choice),
|
|
@@ -297,13 +297,13 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, progress=gr.Prog
|
|
| 297 |
adata = sc.read_h5ad(tmp_in, backed=None)
|
| 298 |
|
| 299 |
# compute UMAP
|
| 300 |
-
progress(0.60, desc="computing UMAP
|
| 301 |
color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
|
| 302 |
coords = _compute_umap_from_emb(E)
|
| 303 |
adata.obsm["X_umap"] = coords
|
| 304 |
|
| 305 |
# plot UMAP
|
| 306 |
-
progress(0.80, desc="plotting UMAP
|
| 307 |
import matplotlib.pyplot as plt
|
| 308 |
fig = plt.figure(figsize=(5.5, 5.0))
|
| 309 |
ax = fig.add_subplot(111)
|
|
@@ -331,7 +331,7 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, progress=gr.Prog
|
|
| 331 |
plt.close(fig)
|
| 332 |
|
| 333 |
# save other outputs and return paths
|
| 334 |
-
progress(0.95, desc="saving outputs
|
| 335 |
parquet_path, h5ad_path = _save_outputs(adata, E)
|
| 336 |
progress(1.00, desc="finished!")
|
| 337 |
return str(umap_png.resolve()), str(parquet_path.resolve()), str(h5ad_path.resolve()), ["<use .X>"] + layers, var_cols, obs_cols
|
|
|
|
| 1 |
# install custom package
|
| 2 |
import os
|
| 3 |
+
os.system("pip install --no-deps ./tahoex-0.1.2-py3-none-any.whl")
|
| 4 |
|
| 5 |
# imports
|
| 6 |
import gc
|
|
|
|
| 16 |
from pathlib import Path
|
| 17 |
from composer import Trainer, Callback
|
| 18 |
from omegaconf import OmegaConf as om
|
| 19 |
+
from tahoex.model.model import ComposerTX
|
| 20 |
+
from tahoex.data import CountDataset, DataCollator
|
| 21 |
+
from tahoex.tokenizer import GeneVocab
|
| 22 |
|
| 23 |
# hardcoded configuration
|
| 24 |
EMB_KEY = "X_tx1-70m"
|
|
|
|
| 109 |
def predict_batch_end(self, state, logger):
|
| 110 |
self.seen += 1
|
| 111 |
frac = self.start + (self.end - self.start) * (self.seen / self.total)
|
| 112 |
+
self.progress(frac, desc=f"computing Tx1 embeddings ({self.seen} / {self.total} batches)")
|
| 113 |
|
| 114 |
# compute embeddings
|
| 115 |
def _embed(adata_bytes, layer_name, feature_col, progress):
|
|
|
|
| 158 |
vocab = GeneVocab.from_file(vocab_path)
|
| 159 |
|
| 160 |
# load model
|
| 161 |
+
model = ComposerTX(model_config=model_config, collator_config=collator_config)
|
| 162 |
model.load_state_dict(torch.load(ckpt)["state"]["model"], strict=strict)
|
| 163 |
model.to("cuda")
|
| 164 |
model.eval()
|
|
|
|
| 276 |
raise gr.Error("Please select a .var column.")
|
| 277 |
|
| 278 |
# read upload file to bytes so the GPU function can load it
|
| 279 |
+
progress(0.05, desc="reading AnnData")
|
| 280 |
with open(fileobj.name, "rb") as f:
|
| 281 |
adata_bytes = f.read()
|
| 282 |
|
| 283 |
# compute embeddings on GPU
|
| 284 |
+
progress(0.10, desc="computing Tx1 embeddings")
|
| 285 |
E, layers, var_cols, obs_cols, adata_with_emb_bytes = _embed(
|
| 286 |
adata_bytes=adata_bytes,
|
| 287 |
layer_name=(None if layer_choice in [None, "", "<use .X>"] else layer_choice),
|
|
|
|
| 297 |
adata = sc.read_h5ad(tmp_in, backed=None)
|
| 298 |
|
| 299 |
# compute UMAP
|
| 300 |
+
progress(0.60, desc="computing UMAP")
|
| 301 |
color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
|
| 302 |
coords = _compute_umap_from_emb(E)
|
| 303 |
adata.obsm["X_umap"] = coords
|
| 304 |
|
| 305 |
# plot UMAP
|
| 306 |
+
progress(0.80, desc="plotting UMAP")
|
| 307 |
import matplotlib.pyplot as plt
|
| 308 |
fig = plt.figure(figsize=(5.5, 5.0))
|
| 309 |
ax = fig.add_subplot(111)
|
|
|
|
| 331 |
plt.close(fig)
|
| 332 |
|
| 333 |
# save other outputs and return paths
|
| 334 |
+
progress(0.95, desc="saving outputs")
|
| 335 |
parquet_path, h5ad_path = _save_outputs(adata, E)
|
| 336 |
progress(1.00, desc="finished!")
|
| 337 |
return str(umap_png.resolve()), str(parquet_path.resolve()), str(h5ad_path.resolve()), ["<use .X>"] + layers, var_cols, obs_cols
|
mosaicfm-0.1.2-py3-none-any.whl
DELETED
|
Binary file (38.4 kB)
|
|
|
tahoex-0.1.2-py3-none-any.whl
ADDED
|
Binary file (41.4 kB). View file
|
|
|