Umair Khan commited on
Commit
c4659e3
·
1 Parent(s): 4e13dd6

update model naming

Browse files
app.py CHANGED
@@ -1,6 +1,6 @@
1
  # install custom package
2
  import os
3
- os.system("pip install --no-deps ./mosaicfm-0.1.2-py3-none-any.whl")
4
 
5
  # imports
6
  import gc
@@ -16,9 +16,9 @@ import scanpy as sc
16
  from pathlib import Path
17
  from composer import Trainer, Callback
18
  from omegaconf import OmegaConf as om
19
- from mosaicfm.model.model import ComposerSCGPTModel
20
- from mosaicfm.data import CountDataset, DataCollator
21
- from mosaicfm.tokenizer import GeneVocab
22
 
23
  # hardcoded configuration
24
  EMB_KEY = "X_tx1-70m"
@@ -109,7 +109,7 @@ class GradioProgressCallback(Callback):
109
  def predict_batch_end(self, state, logger):
110
  self.seen += 1
111
  frac = self.start + (self.end - self.start) * (self.seen / self.total)
112
- self.progress(frac, desc=f"computing Tx1 embeddings ({self.seen} / {self.total} batches)...")
113
 
114
  # compute embeddings
115
  def _embed(adata_bytes, layer_name, feature_col, progress):
@@ -158,7 +158,7 @@ def _embed(adata_bytes, layer_name, feature_col, progress):
158
  vocab = GeneVocab.from_file(vocab_path)
159
 
160
  # load model
161
- model = ComposerSCGPTModel(model_config=model_config, collator_config=collator_config)
162
  model.load_state_dict(torch.load(ckpt)["state"]["model"], strict=strict)
163
  model.to("cuda")
164
  model.eval()
@@ -276,12 +276,12 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, progress=gr.Prog
276
  raise gr.Error("Please select a .var column.")
277
 
278
  # read upload file to bytes so the GPU function can load it
279
- progress(0.05, desc="reading AnnData...")
280
  with open(fileobj.name, "rb") as f:
281
  adata_bytes = f.read()
282
 
283
  # compute embeddings on GPU
284
- progress(0.10, desc="computing Tx1 embeddings...")
285
  E, layers, var_cols, obs_cols, adata_with_emb_bytes = _embed(
286
  adata_bytes=adata_bytes,
287
  layer_name=(None if layer_choice in [None, "", "<use .X>"] else layer_choice),
@@ -297,13 +297,13 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, progress=gr.Prog
297
  adata = sc.read_h5ad(tmp_in, backed=None)
298
 
299
  # compute UMAP
300
- progress(0.60, desc="computing UMAP...")
301
  color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
302
  coords = _compute_umap_from_emb(E)
303
  adata.obsm["X_umap"] = coords
304
 
305
  # plot UMAP
306
- progress(0.80, desc="plotting UMAP...")
307
  import matplotlib.pyplot as plt
308
  fig = plt.figure(figsize=(5.5, 5.0))
309
  ax = fig.add_subplot(111)
@@ -331,7 +331,7 @@ def run_pipeline(fileobj, layer_choice, var_choice, obs_choice, progress=gr.Prog
331
  plt.close(fig)
332
 
333
  # save other outputs and return paths
334
- progress(0.95, desc="saving outputs...")
335
  parquet_path, h5ad_path = _save_outputs(adata, E)
336
  progress(1.00, desc="finished!")
337
  return str(umap_png.resolve()), str(parquet_path.resolve()), str(h5ad_path.resolve()), ["<use .X>"] + layers, var_cols, obs_cols
 
1
  # install custom package
2
  import os
3
+ os.system("pip install --no-deps ./tahoex-0.1.2-py3-none-any.whl")
4
 
5
  # imports
6
  import gc
 
16
  from pathlib import Path
17
  from composer import Trainer, Callback
18
  from omegaconf import OmegaConf as om
19
+ from tahoex.model.model import ComposerTX
20
+ from tahoex.data import CountDataset, DataCollator
21
+ from tahoex.tokenizer import GeneVocab
22
 
23
  # hardcoded configuration
24
  EMB_KEY = "X_tx1-70m"
 
109
  def predict_batch_end(self, state, logger):
110
  self.seen += 1
111
  frac = self.start + (self.end - self.start) * (self.seen / self.total)
112
+ self.progress(frac, desc=f"computing Tx1 embeddings ({self.seen} / {self.total} batches)")
113
 
114
  # compute embeddings
115
  def _embed(adata_bytes, layer_name, feature_col, progress):
 
158
  vocab = GeneVocab.from_file(vocab_path)
159
 
160
  # load model
161
+ model = ComposerTX(model_config=model_config, collator_config=collator_config)
162
  model.load_state_dict(torch.load(ckpt)["state"]["model"], strict=strict)
163
  model.to("cuda")
164
  model.eval()
 
276
  raise gr.Error("Please select a .var column.")
277
 
278
  # read upload file to bytes so the GPU function can load it
279
+ progress(0.05, desc="reading AnnData")
280
  with open(fileobj.name, "rb") as f:
281
  adata_bytes = f.read()
282
 
283
  # compute embeddings on GPU
284
+ progress(0.10, desc="computing Tx1 embeddings")
285
  E, layers, var_cols, obs_cols, adata_with_emb_bytes = _embed(
286
  adata_bytes=adata_bytes,
287
  layer_name=(None if layer_choice in [None, "", "<use .X>"] else layer_choice),
 
297
  adata = sc.read_h5ad(tmp_in, backed=None)
298
 
299
  # compute UMAP
300
+ progress(0.60, desc="computing UMAP")
301
  color_series = adata.obs[obs_choice] if (obs_choice and obs_choice in adata.obs) else None
302
  coords = _compute_umap_from_emb(E)
303
  adata.obsm["X_umap"] = coords
304
 
305
  # plot UMAP
306
+ progress(0.80, desc="plotting UMAP")
307
  import matplotlib.pyplot as plt
308
  fig = plt.figure(figsize=(5.5, 5.0))
309
  ax = fig.add_subplot(111)
 
331
  plt.close(fig)
332
 
333
  # save other outputs and return paths
334
+ progress(0.95, desc="saving outputs")
335
  parquet_path, h5ad_path = _save_outputs(adata, E)
336
  progress(1.00, desc="finished!")
337
  return str(umap_png.resolve()), str(parquet_path.resolve()), str(h5ad_path.resolve()), ["<use .X>"] + layers, var_cols, obs_cols
mosaicfm-0.1.2-py3-none-any.whl DELETED
Binary file (38.4 kB)
 
tahoex-0.1.2-py3-none-any.whl ADDED
Binary file (41.4 kB). View file