Spaces:

RobroKools
/

CellDreamer-API

Sleeping

App Files Files Community

RobroKools commited on Jan 26

Commit

e59f78e

verified ·

1 Parent(s): 06ac391

Upload 44 files

Browse files

Files changed (44) hide show

app.py +99 -0
celldreamer/__init__.py +0 -0
celldreamer/__pycache__/__init__.cpython-310.pyc +0 -0
celldreamer/__pycache__/__init__.cpython-313.pyc +0 -0
celldreamer/checkpoints/best.pth +3 -0
celldreamer/checkpoints/last.pth +3 -0
celldreamer/config/evaluate_config.yml +29 -0
celldreamer/config/train_config.yml +30 -0
celldreamer/data/__init__.py +133 -0
celldreamer/data/__pycache__/__init__.cpython-310.pyc +0 -0
celldreamer/data/__pycache__/class_celldreamerDataset.cpython-310.pyc +0 -0
celldreamer/data/__pycache__/download.cpython-310.pyc +0 -0
celldreamer/data/__pycache__/plots.cpython-310.pyc +0 -0
celldreamer/data/__pycache__/process.cpython-310.pyc +0 -0
celldreamer/data/class_celldreamerDataset.py +48 -0
celldreamer/data/download.py +17 -0
celldreamer/data/plots.py +33 -0
celldreamer/data/process.py +59 -0
celldreamer/data/stats/stats.pt +3 -0
celldreamer/environments/environment_cpu.yml +25 -0
celldreamer/environments/environment_gpu.yml +29 -0
celldreamer/logs/CellDreamer_V1_Panc8_20260124-172947/events.out.tfevents.1769304587.wifi-10-45-214-157.wifi.berkeley.edu.83075.0 +3 -0
celldreamer/logs/CellDreamer_V1_Panc8_20260124-173010/events.out.tfevents.1769304610.wifi-10-45-214-157.wifi.berkeley.edu.83336.0 +3 -0
celldreamer/logs/CellDreamer_V1_Panc8_20260125-131802/events.out.tfevents.1769375882.wifi-10-45-214-157.wifi.berkeley.edu.13242.0 +3 -0
celldreamer/models/__init__.py +10 -0
celldreamer/models/__pycache__/__init__.cpython-310.pyc +0 -0
celldreamer/models/__pycache__/__init__.cpython-313.pyc +0 -0
celldreamer/models/__pycache__/class_celldreamer.cpython-310.pyc +0 -0
celldreamer/models/__pycache__/evaluate.cpython-310.pyc +0 -0
celldreamer/models/__pycache__/least_squares_umap.cpython-310.pyc +0 -0
celldreamer/models/__pycache__/networks.cpython-310.pyc +0 -0
celldreamer/models/__pycache__/train.cpython-310.pyc +0 -0
celldreamer/models/class_celldreamer.py +94 -0
celldreamer/models/evaluate.py +145 -0
celldreamer/models/least_squares_umap.py +56 -0
celldreamer/models/networks.py +162 -0
celldreamer/models/train.py +170 -0
celldreamer/results/latent_umap.png +0 -0
celldreamer/results/test_metrics.json +11 -0
celldreamer/scripts/data.sh +3 -0
celldreamer/scripts/evaluate.sh +3 -0
celldreamer/scripts/train.sh +5 -0
master.ipynb +241 -0
requirements.txt +8 -0

app.py ADDED Viewed

	@@ -0,0 +1,99 @@

+import gradio as gr
+import torch
+import sys
+import os
+sys.path.append(os.getcwd())
+from celldreamer.models.class_celldreamer import ClassCellDreamer
+from celldreamer.models import load_config
+CONFIG_PATH = "celldreamer/config/evaluate_config.yml"
+CHECKPOINT_PATH = "celldreamer/checkpoints/best.pth"
+STATS_PATH = "celldreamer/data/stats/stats.pt"
+RNN_DIM = 32
+try:
+    args = load_config(CONFIG_PATH)
+    args.device = "cpu"
+    model_wrapper = ClassCellDreamer(args)
+    state_dict = torch.load(CHECKPOINT_PATH, map_location=torch.device('cpu'))
+    model_wrapper.model.load_state_dict(state_dict)
+    model_wrapper.model.eval()
+    model_wrapper.model.encoder.eval()
+    model_wrapper.model.decoder.eval()
+    print("Model loaded successfully.")
+    stats = torch.load(STATS_PATH, map_location="cpu")
+    train_mean = stats["mean"].view(1, -1)
+    train_std = stats["std"].view(1, -1)
+    STATS_LOADED = True
+    print("Normalization stats loaded.")
+except Exception as e:
+    print(f"Critical Error during initialization: {e}")
+    STATS_LOADED = False
+def normalize_input(x_raw):
+    x_log = torch.log1p(x_raw)
+    if STATS_LOADED:
+        x_scaled = (x_log - train_mean) / train_std
+    else:
+        x_scaled = x_log
+    return torch.clamp(x_scaled, max=10.0)
+def predict_api(input_data):
+    # Validation
+    if model_wrapper is None:
+        return {"error": "Model not loaded"}
+    try:
+        genes = input_data.get("genes")
+        steps = input_data.get("steps", 10)
+        x_t = torch.tensor(genes, dtype=torch.float32)
+        if x_t.dim() == 1: x_t = x_t.unsqueeze(0)
+        if x_t.shape[1] != args.num_genes:
+             return {"error": f"Gene count mismatch. Expected {args.num_genes}, got {x_t.shape[1]}"}
+        x_norm = normalize_input(x_t)
+        trajectory = []
+        with torch.no_grad():
+            z_mean, z_std = model_wrapper.model.encoder(x_norm)
+        z_current = z_mean
+        hidden_state = torch.zeros(z_current.size(0), RNN_DIM)
+        trajectory = []
+        for i in range(steps):
+            trajectory.append(z_current[0].tolist())
+            hidden, velocity_mean, velocity_std  = model_wrapper.model.rssm(z_current, hidden_state)
+            z_next = z_current + velocity_mean
+            z_current = z_next
+        return {
+            "status": "success",
+            "trajectory": trajectory
+        }
+    except Exception as e:
+        return {"error": str(e)}
+demo = gr.Interface(
+     fn=predict_api,
+     inputs=gr.JSON(label="Input Gene Vector"),
+     outputs=gr.JSON(label="Output"),
+     title="CellDreamer API"
+)
+if __name__ == "__main__":
+     demo.launch()

celldreamer/__init__.py ADDED Viewed

File without changes

celldreamer/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (176 Bytes). View file

celldreamer/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (194 Bytes). View file

celldreamer/checkpoints/best.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ea01e526ec38112a805fe698dfd7f41073a9644bb3db2c369da4ff941c669532
+size 5453065

celldreamer/checkpoints/last.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:76bd8aa65b1a7b9193217bc2475b1979e85c72f8ff5bd11d18d477db77baac98
+size 5453065

celldreamer/config/evaluate_config.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+run_name: "Eval_CellDreamer_V1"
+model_type: "celldreamer"
+device: "mps"
+data_path: "celldreamer/data/datasets"
+checkpoint_path: "celldreamer/checkpoints/best.pth"
+output_dir: "celldreamer/results"
+output_filename: "test_metrics.json"
+batch_size: 128
+kl_scale: 0.01 # updated to match train_config to prevent posterior collapse
+# MUST BE SAME AS TRAINIG CONFIG
+num_genes: 2446
+latent_dim: 50
+rnn_dim: 32
+learning_rate: 25e-6
+enc_hidden_dims:
+  - 256
+  - 128
+dec_hidden_dims:
+  - 128
+  - 256
+weight_decay: 1e-3

celldreamer/config/train_config.yml ADDED Viewed

	@@ -0,0 +1,30 @@

+run_name: "CellDreamer_V1_Panc8"
+model_type: "celldreamer"
+device: "cuda"
+data_path: "celldreamer/data/datasets"
+save_dir: "celldreamer/checkpoints"
+log_dir: "celldreamer/logs"
+epochs: 30
+batch_size: 128 # dreamer uses higher batch sizes to reduce noise from affecting learning
+learning_rate: 25e-6
+log_interval: 10
+save_freq: 10
+num_genes: 2446
+latent_dim: 50 # z (embedding)
+rnn_dim: 32 # h (memory)
+# [Input -> 256 -> 128 -> Latent]
+enc_hidden_dims:
+  - 256
+  - 128
+# [Latent+RNN -> 128 -> 256 -> Output]
+dec_hidden_dims:
+  - 128
+  - 256
+weight_decay: 1e-3
+kl_scale: 0.01 # increased from 0.00001 to prevent posterior collapse. Lower = more dream, higher = more physics emphasis

celldreamer/data/__init__.py ADDED Viewed

	@@ -0,0 +1,133 @@

+import torch
+import os
+import scanpy as sc
+import numpy as np
+import json
+import scipy
+from celldreamer.data.download import collect_data
+from celldreamer.data.process import process
+from celldreamer.data.plots import validate
+from celldreamer.data.class_celldreamerDataset import CellDreamerDataset
+def create_data():
+     collect_data()
+     process()
+     validate()
+     dtr = CellDreamerDataset(pairs_path="celldreamer/data/processed/train_pairs.npy")
+     dv = CellDreamerDataset(pairs_path="celldreamer/data/processed/val_pairs.npy")
+     dt = CellDreamerDataset(pairs_path="celldreamer/data/processed/test_pairs.npy")
+     os.makedirs("celldreamer/data/datasets", exist_ok=True)
+     torch.save(dtr, "celldreamer/data/datasets/train.pt")
+     torch.save(dv, "celldreamer/data/datasets/val.pt")
+     torch.save(dt, "celldreamer/data/datasets/test.pt")
+def get_data_stats(n_background_points=5000):
+     data_path = "celldreamer/data/processed/cleaned.h5ad"
+     adata = sc.read(data_path)
+     if adata.raw is not None:
+          raw_subset = adata.raw[:, adata.var_names]
+          X_source = raw_subset.X
+          if scipy.sparse.issparse(X_source):
+               X_source = X_source.toarray()
+          mean = np.mean(X_source, axis=0)
+          std = np.std(X_source, axis=0)
+     else:
+          X_source = adata.X
+          if scipy.sparse.issparse(X_source):
+               X_source = X_source.toarray()
+          mean = np.mean(X_source, axis=0)
+          std = np.std(X_source, axis=0)
+     std[std == 0] = 1.0
+     stats = {
+          "mean": torch.tensor(mean),
+          "std": torch.tensor(std)
+     }
+     os.makedirs("celldreamer/data/stats", exist_ok=True)
+     torch.save(stats, "celldreamer/data/stats/stats.pt")
+     # create useful data for react application
+     output_dir="celldreamer/data/artifacts"
+     os.makedirs(output_dir, exist_ok=True)
+     # create index to gene name map
+     gene_names = adata.var_names.tolist()
+     gene_indices = {name: i for i, name in enumerate(gene_names)}
+     gene_map_payload = {
+          "gene_names": gene_names, # dropdown
+          "indices": gene_indices # model gene perterbation
+     }
+     with open(f"{output_dir}/gene_map.json", "w") as f:
+          json.dump(gene_map_payload, f)
+     # get random 5000 coords for showing cell type clusters
+     if 'X_umap' not in adata.obsm:
+          if 'neighbors' not in adata.uns:
+               sc.pp.neighbors(adata)
+          sc.tl.umap(adata)
+     total_cells = adata.shape[0]
+     if total_cells > n_background_points:
+          indices = np.random.choice(total_cells, n_background_points, replace=False)
+          indices.sort()
+     else:
+          indices = np.arange(total_cells)
+     umap_coords = adata.obsm['X_umap']
+     background_payload = []
+     has_celltype = 'celltype' in adata.obs
+     for idx in indices:
+          idx = int(idx)
+          point = {
+               "id": idx,
+               "x": round(float(umap_coords[idx, 0]), 3),
+               "y": round(float(umap_coords[idx, 1]), 3),
+               "t": round(float(adata.obs['dpt_pseudotime'].iloc[idx]), 3)
+          }
+          if has_celltype:
+               point["label"] = str(adata.obs['celltype'].iloc[idx])
+          background_payload.append(point)
+     with open(f"{output_dir}/background_map.json", "w") as f:
+          json.dump(background_payload, f)
+     # get mean ductal cell that can be used as a starting point for people to perterb
+     stem_mask = adata.obs['celltype'].str.contains('ductal', case=False)
+     if stem_mask.sum() == 0:
+         stem_data = adata.X
+     else:
+         stem_data = adata.X[stem_mask]
+     if scipy.sparse.issparse(stem_data):
+          mean_stem_z_score = stem_data.mean(axis=0).A1
+     else:
+          mean_stem_z_score = stem_data.mean(axis=0)
+     # Un-scale the data so the UI gets usable numbers (not -1.7)
+     usable_stem_vector = (mean_stem_z_score * std) + mean
+     usable_stem_vector = np.maximum(usable_stem_vector, 0.0)
+     with open(f"{output_dir}/default_stem_cell.json", "w") as f:
+          json.dump(usable_stem_vector.tolist(), f)
+if __name__ == "__main__":
+     create_data()
+     get_data_stats()

celldreamer/data/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (3.33 kB). View file

celldreamer/data/__pycache__/class_celldreamerDataset.cpython-310.pyc ADDED Viewed

Binary file (1.66 kB). View file

celldreamer/data/__pycache__/download.cpython-310.pyc ADDED Viewed

Binary file (693 Bytes). View file

celldreamer/data/__pycache__/plots.cpython-310.pyc ADDED Viewed

Binary file (1.19 kB). View file

celldreamer/data/__pycache__/process.cpython-310.pyc ADDED Viewed

Binary file (2.01 kB). View file

celldreamer/data/class_celldreamerDataset.py ADDED Viewed

	@@ -0,0 +1,48 @@

+import torch
+from torch.utils.data import Dataset
+import scanpy as sc
+import numpy as np
+import scipy.sparse
+class CellDreamerDataset(Dataset):
+     def __init__(
+          self,
+          data_path="celldreamer/data/processed/cleaned.h5ad",
+          pairs_path="celldreamer/data/processed/train_pairs.npy",
+          normalize=False
+     ):
+          adata = sc.read(data_path)
+          data_min = adata.X.min()
+          data_max = adata.X.max()
+          print(f"min: {data_min:.4f}, max: {data_max:.4f}")
+          if normalize:
+               sc.pp.normalize_total(adata, target_sum=1e4)
+               sc.pp.log1p(adata)
+          self.pairs = np.load(pairs_path)
+          if scipy.sparse.issparse(adata.X):
+               self.data = torch.tensor(adata.X.toarray(), dtype=torch.float32)
+          else:
+               self.data = torch.tensor(adata.X, dtype=torch.float32)
+          self.times = torch.tensor(adata.obs['dpt_pseudotime'].values, dtype=torch.float32)
+     def __len__(self):
+          return len(self.pairs)
+     def __getitem__(self, idx):
+          curr_idx, next_idx = self.pairs[idx]
+          x_t = self.data[curr_idx]
+          x_next = self.data[next_idx]
+          return {
+               "x_t": x_t,
+               "x_next": x_next,
+               "delta": x_next - x_t,
+               "dt": self.times[next_idx] - self.times[curr_idx]
+          }

celldreamer/data/download.py ADDED Viewed

	@@ -0,0 +1,17 @@

+import os
+import urllib.request
+import scanpy as sc
+def collect_data():
+    os.makedirs("celldreamer/data/raw", exist_ok=True)
+    # Source: https://scanpy-tutorials.readthedocs.io/en/latest/integrating-data-using-ingest.html
+    url = "https://www.dropbox.com/s/qj1jlm9w10wmt0u/pancreas.h5ad?dl=1"
+    save_path = "celldreamer/data/raw/panc8_raw.h5ad"
+    urllib.request.urlretrieve(url, save_path)
+    adata = sc.read(save_path)
+    print(f"{adata.shape[0]} cells x {adata.shape[1]} genes")

celldreamer/data/plots.py ADDED Viewed

	@@ -0,0 +1,33 @@

+import scanpy as sc
+import numpy as np
+import matplotlib.pyplot as plt
+def validate():
+    adata = sc.read("celldreamer/data/processed/cleaned.h5ad")
+    pairs = np.load("celldreamer/data/processed/full_set.npy")
+    sc.tl.umap(adata) # get umap embedding
+    # timeline: EXPECTED; gradient from blue in beginning going to red later on
+    fig, axs = plt.subplots(1, 2, figsize=(15, 6))
+    sc.pl.umap(adata, color='dpt_pseudotime', ax=axs[0], show=False, title="Pseudotime (Time)")
+    sc.pl.umap(adata, color='celltype', ax=axs[1], show=False, title="Pairs (Arrows)")
+    umap_coords = adata.obsm['X_umap']
+    # choose 100 random pairs and if it's good for those we assume its good for the others
+    sample_indices = np.random.choice(len(pairs), 100, replace=False)
+    for idx in sample_indices:
+        i, j = pairs[idx]
+        start = umap_coords[i]
+        end = umap_coords[j]
+        # make sure there aren't too many extremeley long arrows in the plot cuz those = data is shooting around umap space
+        axs[1].arrow(start[0], start[1], end[0]-start[0], end[1]-start[1],
+                     head_width=0.3, length_includes_head=True, color='black', alpha=0.5)
+    plt.tight_layout()
+    plt.savefig("celldreamer/data/processed/dataset_cell_futures.png")

celldreamer/data/process.py ADDED Viewed

	@@ -0,0 +1,59 @@

+import scanpy as sc
+import numpy as np
+from sklearn.model_selection import train_test_split
+import os
+import warnings
+warnings.filterwarnings("ignore", category=FutureWarning, module="anndata")
+warnings.filterwarnings("ignore", message="Moving element from .uns")
+def process():
+     os.makedirs("celldreamer/data/processed", exist_ok=True)
+     adata = sc.read("celldreamer/data/raw/panc8_raw.h5ad")
+     sc.pp.filter_cells(adata, min_genes=200)
+     sc.pp.filter_genes(adata, min_cells=3)
+     print(f"cleaned Shape: {adata.shape}")
+     print("getting K-nearest nieghbors")
+     sc.pp.pca(adata, n_comps=50)
+     sc.pp.neighbors(adata, n_neighbors=30, n_pcs=20)
+     sc.tl.diffmap(adata)
+     # find step 0 stem cell
+     try:
+          root_candidates = np.where(adata.obs['celltype'].str.contains('ductal', case=False))[0]
+          adata.uns['iroot'] = root_candidates[0] if len(root_candidates) > 0 else 0
+     except:
+          adata.uns['iroot'] = 0
+     sc.tl.dpt(adata)
+     # create t,t+1 pairs
+     print("creating pairs")
+     graph = adata.obsp['connectivities']
+     times = adata.obs['dpt_pseudotime'].values
+     pairs = []
+     rows, cols = graph.nonzero()
+     for i, j in zip(rows, cols):
+          t_i, t_j = times[i], times[j]
+          # max time diff is 0.1 for ~similar time diffs
+          if t_j > t_i and (t_j - t_i) < 0.1:
+               pairs.append([i, j])
+     pairs = np.array(pairs)
+     train, temp = train_test_split(pairs, test_size=0.2, random_state=42)
+     val, test = train_test_split(temp, test_size=0.5, random_state=42)
+     np.save("celldreamer/data/processed/train_pairs.npy", train)
+     np.save("celldreamer/data/processed/val_pairs.npy", val)
+     np.save("celldreamer/data/processed/test_pairs.npy", test)
+     print(f"Train({len(train)}), Val({len(val)}), Test({len(test)})")
+     adata.write("celldreamer/data/processed/cleaned.h5ad")
+     np.save("celldreamer/data/processed/full_set.npy", pairs)

celldreamer/data/stats/stats.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:542bb1069a0d55ba11cc26ffea8ab5e0b94f84198e9614fb68bedc5ddb38b267
+size 20876

celldreamer/environments/environment_cpu.yml ADDED Viewed

	@@ -0,0 +1,25 @@

+name: celldreamer
+channels:
+  - pytorch
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.10
+  - pytorch
+  - torchvision
+  - torchaudio
+  - cpuonly
+  - numpy<2.0
+  - pandas
+  - scipy
+  - scikit-learn
+  - matplotlib
+  - seaborn
+  - scanpy
+  - python-igraph
+  - leidenalg
+  - tqdm
+  - jupyterlab
+  - pip
+  - pip:
+    - umap-learn

celldreamer/environments/environment_gpu.yml ADDED Viewed

	@@ -0,0 +1,29 @@

+name: celldreamer
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - python=3.10
+  - pytorch
+  - torchvision
+  - torchaudio
+  - pytorch-cuda=11.8   # 12.1 for 40xx card
+  - numpy<2.0
+  - pandas
+  - scipy
+  - scikit-learn
+  - matplotlib
+  - seaborn
+  - scanpy
+  - python-igraph
+  - leidenalg
+  - tqdm
+  - jupyterlab
+  - pip
+  - tensorboard
+  - pip:
+    - umap-learn
+    - python-box
+    - yaml

celldreamer/logs/CellDreamer_V1_Panc8_20260124-172947/events.out.tfevents.1769304587.wifi-10-45-214-157.wifi.berkeley.edu.83075.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba268787e132a3ee99092028bcae8b0cc2a6737f3e37b428a101632ed03cf2e8
+size 88

celldreamer/logs/CellDreamer_V1_Panc8_20260124-173010/events.out.tfevents.1769304610.wifi-10-45-214-157.wifi.berkeley.edu.83336.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:82a58fcb7f8cac67888636752fdd96097d97f2ebd42843efb67bfe6e17ff11eb
+size 84568

celldreamer/logs/CellDreamer_V1_Panc8_20260125-131802/events.out.tfevents.1769375882.wifi-10-45-214-157.wifi.berkeley.edu.13242.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:42ffdfd63227943975a940d4386c61e4c4c84e454fe3976984dff168a790b4b0
+size 88

celldreamer/models/__init__.py ADDED Viewed

	@@ -0,0 +1,10 @@

+import yaml
+from box import Box
+def load_config(path):
+    with open(path, 'r') as f:
+        args = Box(yaml.safe_load(f))
+        args.learning_rate = float(args.learning_rate)
+        args.weight_decay = float(args.weight_decay)
+    return args

celldreamer/models/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (521 Bytes). View file

celldreamer/models/__pycache__/__init__.cpython-313.pyc ADDED Viewed

Binary file (786 Bytes). View file

celldreamer/models/__pycache__/class_celldreamer.cpython-310.pyc ADDED Viewed

Binary file (2.73 kB). View file

celldreamer/models/__pycache__/evaluate.cpython-310.pyc ADDED Viewed

Binary file (3.45 kB). View file

celldreamer/models/__pycache__/least_squares_umap.cpython-310.pyc ADDED Viewed

Binary file (1.64 kB). View file

celldreamer/models/__pycache__/networks.cpython-310.pyc ADDED Viewed

Binary file (3.85 kB). View file

celldreamer/models/__pycache__/train.cpython-310.pyc ADDED Viewed

Binary file (3.52 kB). View file

celldreamer/models/class_celldreamer.py ADDED Viewed

	@@ -0,0 +1,94 @@

+import torch
+import torch.nn.functional as F
+from celldreamer.models.networks import CellDreamer
+class ClassCellDreamer:
+     def __init__(self, args):
+          self.args = args
+          self.device = args.device
+          self.model = CellDreamer(
+               device=torch.device(args.device),
+               latent_dim=args.latent_dim,
+               rnn_dim=args.rnn_dim,
+               enc_hidden_dims=args.enc_hidden_dims,
+               dec_hidden_dims=args.dec_hidden_dims,
+               num_genes=args.num_genes
+          )
+          self.model.to(self.device)
+          self.optimizer = torch.optim.Adam(
+               self.model.parameters(),
+               lr=args.learning_rate,
+               weight_decay=args.weight_decay
+          )
+          self.kl_scale = getattr(args, 'kl_scale', 0.1) # default 0.1
+     def get_kl_loss(self, mean1, std1, mean2, std2):
+          dist1 = torch.distributions.Normal(mean1, std1)
+          dist2 = torch.distributions.Normal(mean2, std2)
+          return torch.distributions.kl_divergence(dist1, dist2).sum(dim=1).mean()
+     def train_step(self, x_t, x_next, current_epoch, total_epochs):
+          self.model.train()
+          self.optimizer.zero_grad()
+          warmup_period = total_epochs // 2
+          kl_weight = min(1.0, (current_epoch / warmup_period))
+          effective_kl = self.kl_scale * kl_weight
+          outputs = self.model(x_t)
+          with torch.no_grad():
+               target_mean, target_std = self.model.encoder(x_next)
+          recon_loss = F.mse_loss(outputs["recon_x"], x_t)
+          # Dynamics KL: KL(posterior(x_next) || prior_next)
+          dynamics_loss = self.get_kl_loss(
+               target_mean, target_std,
+               outputs["prior_next_mean"], outputs["prior_next_std"]
+          )
+          # CRITICAL: Add posterior-prior KL to prevent posterior collapse
+          # KL(posterior(x_t) || N(0,1)) - standard VAE regularization
+          zeros = torch.zeros_like(outputs["post_mean"])
+          ones = torch.ones_like(outputs["post_std"])
+          posterior_kl = self.get_kl_loss(
+               outputs["post_mean"], outputs["post_std"],
+               zeros, ones
+          )
+          # Free bits: ensure minimum KL per dimension to prevent collapse
+          # This ensures the model uses at least some information capacity
+          free_bits_per_dim = 0.1  # minimum nats per dimension
+          min_kl = free_bits_per_dim * outputs["post_mean"].shape[1]
+          posterior_kl = torch.clamp(posterior_kl, min=min_kl)
+          dynamics_loss = torch.clamp(dynamics_loss, min=min_kl)
+          # Total Loss: reconstruction + dynamics KL + posterior regularization
+          total_loss = recon_loss + (effective_kl * dynamics_loss) + (effective_kl * posterior_kl)
+          total_loss.backward()
+          torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
+          self.optimizer.step()
+          return {
+               "loss": total_loss.item(),
+               "recon_loss": recon_loss.item(),
+               "dynamics_loss": dynamics_loss.item(),
+               "posterior_kl": posterior_kl.item(),
+               "kl_weight": effective_kl
+          }
+     def save(self, path):
+          torch.save(self.model.state_dict(), path)
+     def load(self, path):
+          self.model.load_state_dict(torch.load(path, map_location=self.device))

celldreamer/models/evaluate.py ADDED Viewed

	@@ -0,0 +1,145 @@

+import torch
+from torch.utils.data import DataLoader
+from tqdm import tqdm
+import os
+import numpy as np
+import json
+import argparse
+import sys
+import umap
+import matplotlib.pyplot as plt
+from celldreamer.models.class_celldreamer import ClassCellDreamer
+from celldreamer.models import load_config
+def evaluate(args):
+     device = torch.device(args.device)
+     os.makedirs(args.output_dir, exist_ok=True)
+     test_path = f"{args.data_path}/test.pt"
+     print(f"Loading test dataset from {test_path}...")
+     if not os.path.exists(test_path):
+          raise FileNotFoundError(f"Test dataset not found at {test_path}")
+     test_ds = torch.load(test_path, weights_only=False)
+     test_loader = DataLoader(test_ds, batch_size=args.batch_size, shuffle=False, num_workers=2)
+     print(f"Test Size: {len(test_ds)} samples")
+     print(f"Initializing Model: {args.model_type}")
+     if args.model_type.lower() == "celldreamer":
+          model_wrapper = ClassCellDreamer(args)
+     else:
+          raise ValueError(f"Unknown model type: {args.model_type}")
+     model_wrapper.load(args.checkpoint_path)
+     model_wrapper.model.eval()
+     test_recon_losses = []
+     test_dynamics_losses = []
+     test_posterior_kl_losses = []
+     test_total_losses = []
+     all_latents = []
+     print("Running inference...")
+     with torch.no_grad():
+          for batch in tqdm(test_loader, desc="Evaluating"):
+               x_t = batch['x_t'].to(device)
+               x_next = batch['x_next'].to(device)
+               outputs = model_wrapper.model(x_t)
+               target_mean, target_std = model_wrapper.model.encoder(x_next)
+               recon_loss = torch.nn.functional.mse_loss(outputs["recon_x"], x_t)
+               dyn_loss = model_wrapper.get_kl_loss(
+                    target_mean, target_std,
+                    outputs["prior_next_mean"], outputs["prior_next_std"]
+               )
+               # Add posterior KL for consistency with training
+               zeros = torch.zeros_like(outputs["post_mean"])
+               ones = torch.ones_like(outputs["post_std"])
+               post_kl = model_wrapper.get_kl_loss(
+                    outputs["post_mean"], outputs["post_std"],
+                    zeros, ones
+               )
+               # Apply same free bits constraint as training
+               free_bits_per_dim = 0.1
+               min_kl = free_bits_per_dim * outputs["post_mean"].shape[1]
+               post_kl = torch.clamp(post_kl, min=min_kl)
+               dyn_loss = torch.clamp(dyn_loss, min=min_kl)
+               # Use same loss computation as training
+               total_loss = recon_loss + (args.kl_scale * dyn_loss) + (args.kl_scale * post_kl)
+               test_recon_losses.append(recon_loss.item())
+               test_dynamics_losses.append(dyn_loss.item())
+               test_posterior_kl_losses.append(post_kl.item())
+               test_total_losses.append(total_loss.item())
+               all_latents.append(outputs["post_mean"].cpu())
+     metrics = {
+          "model": args.model_type,
+          "checkpoint": args.checkpoint_path,
+          "test_samples": len(test_ds),
+          "metrics": {
+               "avg_total_loss": float(np.mean(test_total_losses)),
+               "avg_recon_loss_mse": float(np.mean(test_recon_losses)),
+               "avg_dynamics_loss_kl": float(np.mean(test_dynamics_losses)),
+               "avg_posterior_kl": float(np.mean(test_posterior_kl_losses)),
+               "std_total_loss": float(np.std(test_total_losses))
+          }
+     }
+     print("Results:")
+     print(f"MSE (Rec): {metrics['metrics']['avg_recon_loss_mse']:.6f}")
+     print(f"KL (Dynamics/Dream): {metrics['metrics']['avg_dynamics_loss_kl']:.6f}")
+     print(f"KL (Posterior): {metrics['metrics']['avg_posterior_kl']:.6f}")
+     print(f"Total Loss: {metrics['metrics']['avg_total_loss']:.6f}")
+     output_file_path = os.path.join(args.output_dir, args.output_filename)
+     with open(output_file_path, 'w') as f:
+          json.dump(metrics, f, indent=4)
+     print(f"\nResults saved to: {output_file_path}")
+     print("Generating UMAP visualization...")
+     latents_tensor = torch.cat(all_latents)
+     reducer = umap.UMAP(n_components=2)
+     coords = reducer.fit_transform(latents_tensor.numpy())
+     plt.figure(figsize=(10, 8))
+     plt.scatter(coords[:, 0], coords[:, 1], s=1, alpha=0.5)
+     plt.title("Latent Space Visualization")
+     umap_path = os.path.join(args.output_dir, "latent_umap.png")
+     plt.savefig(umap_path)
+     plt.close()
+     print(f"UMAP plot saved to {umap_path}")
+if __name__ == "__main__":
+     parser = argparse.ArgumentParser(description="Evaluation script for celldreamer")
+     parser.add_argument(
+          "--config",
+          type=str,
+          default="celldreamer/config/eval_config.yml",
+          help="Path to the YAML configuration file"
+     )
+     args = parser.parse_args()
+     config = load_config(args.config)
+     evaluate(config)

celldreamer/models/least_squares_umap.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import torch
+import scanpy as sc
+import os
+from celldreamer.models.class_celldreamer import ClassCellDreamer
+from celldreamer.models import load_config
+def solve_projector():
+     # loading stuff
+     adata = sc.read("celldreamer/data/processed/cleaned.h5ad")
+     stats = torch.load("celldreamer/data/stats/stats.pt", weights_only=False)
+     args = load_config("celldreamer/config/evaluate_config.yml")
+     args.device = "cpu"
+     wrapper = ClassCellDreamer(args)
+     wrapper.model.load_state_dict(torch.load("celldreamer/checkpoints/best.pth", map_location="cpu", weights_only=True))
+     wrapper.model.eval()
+     if 'X_umap' not in adata.obsm:
+          sc.pp.neighbors(adata)
+          sc.tl.umap(adata)
+     Y_umap = torch.tensor(adata.obsm['X_umap'], dtype=torch.float32)
+     # raw otherwise just x
+     if adata.raw is not None:
+          data = adata.raw[:, adata.var_names].X
+     else:
+          data = adata.X
+     if hasattr(data, "toarray"):
+          data = data.toarray()
+     #XTXb = XTy:
+     x_in = torch.tensor(data, dtype=torch.float32)
+     x_in = torch.log1p(x_in)
+     x_in = (x_in - stats["mean"]) / stats["std"]
+     x_in = torch.clamp(x_in, max=10.0)
+     with torch.no_grad():
+          Z_latent, _ = wrapper.model.encoder(x_in)
+     solution = torch.linalg.lstsq(Z_latent, Y_umap).solution
+     state_dict = {
+          "weight": solution.T,
+          "bias": torch.zeros(2) # ignore
+     }
+     os.makedirs("celldreamer/data/artifacts", exist_ok=True)
+     torch.save(state_dict, "celldreamer/data/artifacts/projector_weights.pth")
+if __name__ == "__main__":
+     solve_projector()

celldreamer/models/networks.py ADDED Viewed

	@@ -0,0 +1,162 @@

+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+# define a mlp encoder
+# inputs: batch x num_genes (2446)
+# outputs: batch x ecoding_dim
+class Encoder(nn.Module):
+     def __init__(self, latent_dim, hidden_dims, num_genes=2446):
+          super().__init__()
+          layers = []
+          prev_dim = num_genes
+          for h_dim in hidden_dims:
+               layers.append(nn.Linear(prev_dim, h_dim))
+               layers.append(nn.BatchNorm1d(h_dim))
+               layers.append(nn.ELU())
+               layers.append(nn.Dropout(0.4))
+               prev_dim = h_dim
+          self.enc_net = nn.Sequential(*layers)
+          self.fc_mean = nn.Linear(prev_dim, latent_dim)
+          self.fc_std = nn.Linear(prev_dim, latent_dim)
+     def forward(self, x_t):
+          h = self.enc_net(x_t)
+          mean = self.fc_mean(h)
+          # Ensure minimum std to prevent posterior collapse
+          # Higher minimum (1e-3) prevents std from collapsing to near-zero
+          std = F.softplus(self.fc_std(h)) + 1e-3
+          return mean, std
+# define a corresponding mlp decoder
+# input: batch x ecoding_dim + rnn_hidden_dim
+class Decoder(nn.Module):
+     def __init__(self, latent_dim, rnn_hidden_dim, hidden_dims, num_genes=2446):
+          super().__init__()
+          layers = []
+          prev_dim = latent_dim + rnn_hidden_dim
+          for h_dim in hidden_dims:
+               layers.append(nn.Linear(prev_dim, h_dim))
+               layers.append(nn.BatchNorm1d(h_dim))
+               layers.append(nn.ELU())
+               layers.append(nn.Dropout(0.4))
+               prev_dim = h_dim
+          layers.append(nn.Linear(prev_dim, num_genes))
+          self.dec_net = nn.Sequential(*layers)
+     def forward(self, z, h):
+          inps = torch.cat([z, h], dim=1)
+          return self.dec_net(inps)
+# define a gru-based rssm
+# input: batch x ecoding_dim at t=0
+# output: batch x 2*encoding_dim at t = 1 to get the mean and standard deviation
+class RSSM(nn.Module):
+     def __init__(self, latent_dim, rnn_hidden_dim):
+          super().__init__()
+          self.latent_dim = latent_dim
+          self.hidden_dim = rnn_hidden_dim
+          self.gru = nn.GRUCell(latent_dim, rnn_hidden_dim)
+          self.mlp = nn.Sequential(
+               nn.Linear(rnn_hidden_dim, rnn_hidden_dim),
+               nn.LayerNorm(rnn_hidden_dim),
+               nn.ELU(),
+               nn.Linear(rnn_hidden_dim, 2 * latent_dim)
+          )
+          # Better initialization: larger std prevents weak prior
+          # Use Xavier/Glorot initialization for better gradient flow
+          nn.init.xavier_uniform_(self.mlp[3].weight, gain=0.1)
+          nn.init.zeros_(self.mlp[3].bias)
+     def forward(self, prev_r, prev_h):
+          h_t_1 = self.gru(prev_r, prev_h)
+          prev_stats = self.mlp(h_t_1)
+          prev_mean, prev_std = torch.chunk(prev_stats, 2, dim=1)
+          prev_std = F.softplus(prev_std) + 1e-3
+          return h_t_1, prev_mean, prev_std
+# create joint training architecture for dreamer
+class CellDreamer(nn.Module):
+     def __init__(
+          self,
+          device,
+          latent_dim = 20,
+          rnn_dim = 64,
+          enc_hidden_dims = [128, 64, 32],
+          dec_hidden_dims = [32, 64, 128],
+          num_genes = 2446
+     ):
+          super().__init__()
+          self.encoder = Encoder(latent_dim, enc_hidden_dims, num_genes)
+          self.decoder = Decoder(latent_dim, rnn_dim, dec_hidden_dims, num_genes)
+          self.rssm = RSSM(latent_dim, rnn_dim)
+          self.rnn_dim = rnn_dim
+          self.latent_dim = latent_dim
+          self.input_dim = num_genes
+          self.device = device
+     def reparametrize(self, mean, std):
+          eps = torch.randn_like(std)
+          return mean + eps * std
+     def forward(self, x_t):
+          post_mean, post_std = self.encoder(x_t)
+          z_t = self.reparametrize(post_mean, post_std)
+          h_prev = torch.zeros(x_t.size(0), self.rnn_dim).to(self.device)
+          h_next, velocity_mean, velocity_std = self.rssm(z_t, h_prev)
+          prior_next_mean = z_t + velocity_mean
+          prior_next_std = velocity_std
+          rec_x = self.decoder(z_t, h_next)
+          return {
+               "recon_x": rec_x,
+               "post_mean": post_mean,
+               "post_std": post_std,
+               "prior_next_mean": prior_next_mean,
+               "prior_next_std": prior_next_std,
+               "z_t": z_t,
+               "h_next": h_next
+          }

celldreamer/models/train.py ADDED Viewed

	@@ -0,0 +1,170 @@

+import torch
+from torch.utils.data import DataLoader
+from torch.utils.tensorboard import SummaryWriter
+from tqdm import tqdm
+import os
+import numpy as np
+from datetime import datetime
+import argparse
+from celldreamer.models.class_celldreamer import ClassCellDreamer
+from celldreamer.models import load_config
+def train(args):
+     device = torch.device(args.device)
+     os.makedirs(args.save_dir, exist_ok=True)
+     os.makedirs(args.log_dir, exist_ok=True)
+     timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
+     writer = SummaryWriter(f"{args.log_dir}/{args.run_name}_{timestamp}")
+     print(f"Loading datasets from {args.data_path}")
+     train_ds = torch.load(f"{args.data_path}/train.pt", weights_only=False)
+     val_ds = torch.load(f"{args.data_path}/val.pt", weights_only=False)
+     train_loader = DataLoader(train_ds, batch_size=args.batch_size, shuffle=True, num_workers=4, pin_memory=True)
+     val_loader = DataLoader(val_ds, batch_size=args.batch_size, shuffle=False, num_workers=4, pin_memory=True)
+     print(f"Train Size: {len(train_ds)} samples")
+     print(f"Val Size:   {len(val_ds)} samples")
+     print(f"Model: {args.model_type}")
+     if args.model_type.lower() == "celldreamer":
+          model_wrapper = ClassCellDreamer(args)
+     else:
+          raise ValueError(f"Unknown model type: {args.model_type}")
+     global_step = 0
+     best_val_loss = float('inf')
+     best_val_mse = float('inf')  # Track best validation MSE separately
+     for epoch in range(1, args.epochs + 1):
+          # --- TRAIN ---
+          model_wrapper.model.train()
+          train_mse = []
+          train_kl = []
+          train_posterior_kl = []
+          train_total = []
+          loop = tqdm(train_loader, desc=f"Epoch {epoch}/{args.epochs} [Train]")
+          for batch in loop:
+               x_t = batch['x_t'].to(device)
+               x_next = batch['x_next'].to(device)
+               logs = model_wrapper.train_step(x_t, x_next, epoch, args.epochs)
+               train_total.append(logs['loss'])
+               train_mse.append(logs['recon_loss'])
+               train_kl.append(logs['dynamics_loss'])
+               train_posterior_kl.append(logs.get('posterior_kl', 0))
+               global_step += 1
+               if global_step % args.log_interval == 0:
+                    writer.add_scalar("Step/Total_Loss", logs['loss'], global_step)
+                    writer.add_scalar("Step/Recon_Loss", logs['recon_loss'], global_step)
+                    writer.add_scalar("Step/Dynamics_KL", logs['dynamics_loss'], global_step)
+                    writer.add_scalar("Step/Posterior_KL", logs.get('posterior_kl', 0), global_step)
+               loop.set_postfix(loss=logs['loss'])
+          # --- VALIDATION ---
+          model_wrapper.model.eval()
+          val_mse = []
+          val_kl = []
+          val_posterior_kl = []
+          val_total = []
+          with torch.no_grad():
+               for batch in tqdm(val_loader, desc=f"Epoch {epoch}/{args.epochs} [Val]  "):
+                    x_t = batch['x_t'].to(device)
+                    x_next = batch['x_next'].to(device)
+                    outputs = model_wrapper.model(x_t)
+                    target_mean, target_std = model_wrapper.model.encoder(x_next)
+                    recon_loss = torch.nn.functional.mse_loss(outputs["recon_x"], x_t)
+                    dyn_loss = model_wrapper.get_kl_loss(
+                         target_mean, target_std,
+                         outputs["prior_next_mean"], outputs["prior_next_std"]
+                    )
+                    # Add posterior KL for consistency with training
+                    zeros = torch.zeros_like(outputs["post_mean"])
+                    ones = torch.ones_like(outputs["post_std"])
+                    post_kl = model_wrapper.get_kl_loss(
+                         outputs["post_mean"], outputs["post_std"],
+                         zeros, ones
+                    )
+                    # Apply same free bits constraint as training
+                    free_bits_per_dim = 0.1
+                    min_kl = free_bits_per_dim * outputs["post_mean"].shape[1]
+                    post_kl = torch.clamp(post_kl, min=min_kl)
+                    dyn_loss = torch.clamp(dyn_loss, min=min_kl)
+                    # Compute KL weight same as training
+                    warmup_period = args.epochs // 2
+                    kl_weight = min(1.0, (epoch / warmup_period))
+                    effective_kl = model_wrapper.kl_scale * kl_weight
+                    total_val_loss = recon_loss + (effective_kl * dyn_loss) + (effective_kl * post_kl)
+                    val_total.append(total_val_loss.item())
+                    val_mse.append(recon_loss.item())
+                    val_kl.append(dyn_loss.item())
+                    val_posterior_kl.append(post_kl.item())
+          # --- STATS ---
+          avg_train_loss = np.mean(train_total)
+          avg_val_loss = np.mean(val_total)
+          writer.add_scalars("Epoch/MSE", {'Train': np.mean(train_mse), 'Val': np.mean(val_mse)}, epoch)
+          writer.add_scalars("Epoch/Dynamics_KL", {'Train': np.mean(train_kl), 'Val': np.mean(val_kl)}, epoch)
+          writer.add_scalars("Epoch/Posterior_KL", {'Train': np.mean(train_posterior_kl), 'Val': np.mean(val_posterior_kl)}, epoch)
+          # Calculate KL contribution to understand why validation loss isn't dropping
+          warmup_period = args.epochs // 2
+          kl_weight = min(1.0, (epoch / warmup_period))
+          effective_kl = model_wrapper.kl_scale * kl_weight
+          val_kl_contribution = effective_kl * (np.mean(val_kl) + np.mean(val_posterior_kl))
+          train_kl_contribution = effective_kl * (np.mean(train_kl) + np.mean(train_posterior_kl))
+          print(f"Stats: Train MSE: {np.mean(train_mse):.4f} | Val MSE: {np.mean(val_mse):.4f} | Train Dyn KL: {np.mean(train_kl):.4f} | Val Dyn KL: {np.mean(val_kl):.4f} | Train Post KL: {np.mean(train_posterior_kl):.4f} | Val Post KL: {np.mean(val_posterior_kl):.4f}")
+          print(f"Loss Breakdown: Train Total: {avg_train_loss:.4f} (MSE: {np.mean(train_mse):.4f} + KL: {train_kl_contribution:.4f}) | Val Total: {avg_val_loss:.4f} (MSE: {np.mean(val_mse):.4f} + KL: {val_kl_contribution:.4f}) | KL Weight: {effective_kl:.6f}")
+          if epoch % args.save_freq  == 0:
+               model_wrapper.save(f"{args.save_dir}/last.pth")
+          avg_val_mse = np.mean(val_mse)
+          if avg_val_loss < best_val_loss:
+               print(f"Best Total Loss: ({best_val_loss:.4f} -> {avg_val_loss:.4f})")
+               best_val_loss = avg_val_loss
+          # Also track best validation MSE (more meaningful metric)
+          if avg_val_mse < best_val_mse:
+               print(f"Best Val MSE: ({best_val_mse:.4f} -> {avg_val_mse:.4f}) - Saving best model")
+               best_val_mse = avg_val_mse
+               model_wrapper.save(f"{args.save_dir}/best.pth")
+     writer.close()
+if __name__ == "__main__":
+     parser = argparse.ArgumentParser(description="trainig script for celldreamer")
+     parser.add_argument(
+          "--config",
+          type=str,
+          default="celldreamer/config/train_config.yml",
+          help="Path to the YmML configuration file (default: celldreamer/config/train_config.yml)"
+     )
+     args = parser.parse_args()
+     config = load_config(args.config)
+     train(config)

celldreamer/results/latent_umap.png ADDED Viewed

celldreamer/results/test_metrics.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "model": "celldreamer",
+    "checkpoint": "celldreamer/checkpoints/best.pth",
+    "test_samples": 18253,
+    "metrics": {
+        "avg_total_loss": 0.6892188849982682,
+        "avg_recon_loss_mse": 0.6890018098837846,
+        "avg_dynamics_loss_kl": 21.70746588540244,
+        "std_total_loss": 0.03752287398763396
+    }
+}

celldreamer/scripts/data.sh ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ #!/bin/bash
2	+
3	+ python -m celldreamer.data.__init__

celldreamer/scripts/evaluate.sh ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ #!/bin/bash
2	+
3	+ python -m celldreamer.models.evaluate --config $1

celldreamer/scripts/train.sh ADDED Viewed

	@@ -0,0 +1,5 @@

+#!/bin/bash
+python -m celldreamer.models.train --config $1
+python -m celldreamer.models.least_squares_umap

master.ipynb ADDED Viewed

	@@ -0,0 +1,241 @@

+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "d6fc963a",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%load_ext autoreload"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "6cf002c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%autoreload 2"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "id": "5e29d1c0",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "ds = torch.load(\"/Users/rohitkulkarni/Documents/projects/CellDreamer/backend/celldreamer/data/datasets/train.pt\", weights_only=False)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "id": "ebe6280f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "torch.Size([2446])"
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "ds[0][\"x_t\"].shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "f9454346",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Calculating stats from data matrix...\n"
+     ]
+    }
+   ],
+   "source": [
+    "from celldreamer.data import get_data_stats\n",
+    "\n",
+    "get_data_stats()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "id": "8c8ff06c",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Loaded as API: https://robrokools-celldreamer-api.hf.space\n"
+     ]
+    },
+    {
+     "data": {
+      "text/plain": [
+       "array([[ 0.20221904, -0.10513306, -0.23988042,  0.1219071 , -0.31176904,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495346,  0.0982306 ,  0.19570428,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255574,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421678, -0.18519297, -0.09254473,\n",
+       "        -0.18334997, -0.19211988, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988041,  0.12190711, -0.31176903,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495346,  0.0982306 ,  0.19570431,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255586,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.0742168 , -0.18519297, -0.09254467,\n",
+       "        -0.18334997, -0.19211988, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988041,  0.12190713, -0.31176903,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495346,  0.0982306 ,  0.19570434,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255598,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421681, -0.18519297, -0.09254462,\n",
+       "        -0.18334997, -0.19211989, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.2398804 ,  0.12190714, -0.31176902,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495345,  0.0982306 ,  0.19570437,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.2425561 ,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421683, -0.18519297, -0.09254456,\n",
+       "        -0.18334997, -0.1921199 , -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988039,  0.12190716, -0.31176901,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495345,  0.0982306 ,  0.1957044 ,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255621,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421684, -0.18519297, -0.0925445 ,\n",
+       "        -0.18334997, -0.1921199 , -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988038,  0.12190717, -0.311769  ,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495345,  0.0982306 ,  0.19570443,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255633,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421686, -0.18519297, -0.09254444,\n",
+       "        -0.18334997, -0.19211991, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988038,  0.12190719, -0.311769  ,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495344,  0.0982306 ,  0.19570446,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255645,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421687, -0.18519297, -0.09254438,\n",
+       "        -0.18334997, -0.19211992, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988037,  0.1219072 , -0.31176899,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495344,  0.0982306 ,  0.19570449,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255657,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421689, -0.18519297, -0.09254432,\n",
+       "        -0.18334997, -0.19211993, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988036,  0.12190722, -0.31176898,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495343,  0.0982306 ,  0.19570452,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255669,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.0742169 , -0.18519297, -0.09254426,\n",
+       "        -0.18334997, -0.19211993, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041],\n",
+       "       [ 0.20221904, -0.10513306, -0.23988035,  0.12190723, -0.31176898,\n",
+       "        -0.07312202, -0.17483664,  0.34703633, -0.14286399,  0.01501414,\n",
+       "         0.24577391, -0.17025626, -0.01052079, -0.16482973,  0.01907933,\n",
+       "        -0.00870946, -0.18495343,  0.0982306 ,  0.19570455,  0.03290927,\n",
+       "        -0.08225775, -0.14782619, -0.00959128, -0.04247084, -0.09117351,\n",
+       "         0.02470946, -0.0560773 , -0.0605984 , -0.18847048,  0.06813312,\n",
+       "         0.24255681,  0.15523338,  0.01986483, -0.23465055, -0.02495009,\n",
+       "         0.03532511,  0.0018872 , -0.07421692, -0.18519297, -0.0925442 ,\n",
+       "        -0.18334997, -0.19211994, -0.07095522,  0.08980912,  0.09272885,\n",
+       "        -0.00154805, -0.11791486,  0.3486139 , -0.21823978,  0.01764041]])"
+      ]
+     },
+     "execution_count": 17,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from gradio_client import Client\n",
+    "import json\n",
+    "import numpy as np\n",
+    "\n",
+    "# 1. Connect to the Gradio Space\n",
+    "# Uses the same endpoint as your Flask app\n",
+    "client = Client(\"RobroKools/CellDreamer-API\")\n",
+    "\n",
+    "result_a = client.predict(\n",
+    "    input_data={\"genes\": list(np.random.rand(2446)), \"steps\": 10} # Sending as list to be safe\n",
+    ")\n",
+    "\n",
+    "result_b = client.predict(\n",
+    "    input_data={\"genes\": list(np.random.rand(2446)), \"steps\": 10}\n",
+    ")\n",
+    "\n",
+    "np.array(result_a[\"trajectory\"]) - np.array(result_b[\"trajectory\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "celldreamer",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+torch
+gradio
+numpy<2.0
+python-box
+pyyaml
+pandas
+scipy
+scanpy