Umair Khan
commited on
Commit
·
8b28b6f
1
Parent(s):
e48c2ab
first pass of .var preview
Browse files
app.py
CHANGED
|
@@ -50,6 +50,9 @@ PARQUET_EMB_COL = "tx1-70m"
|
|
| 50 |
OBS_NONE_OPTION = "(none)"
|
| 51 |
MAX_CATEGORIES = 50
|
| 52 |
|
|
|
|
|
|
|
|
|
|
| 53 |
# helper to read AnnData header
|
| 54 |
def read_anndata_header(fileobj):
|
| 55 |
adata = sc.read_h5ad(fileobj.name, backed="r")
|
|
@@ -68,6 +71,41 @@ def _pick_layer(adata, layer_name):
|
|
| 68 |
X = X.toarray()
|
| 69 |
return X
|
| 70 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
# helper to compute UMAP from given embeddings
|
| 72 |
def _compute_umap_from_emb(emb):
|
| 73 |
ad_umap = ad.AnnData(X=emb)
|
|
@@ -110,17 +148,30 @@ def ensure_dropdowns(fileobj):
|
|
| 110 |
return (
|
| 111 |
gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
|
| 112 |
gr.Dropdown(choices=[], value=None),
|
|
|
|
|
|
|
| 113 |
)
|
| 114 |
try:
|
| 115 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
return (
|
| 117 |
gr.Dropdown(choices=["<use .X>"] + layers, value="<use .X>"),
|
| 118 |
-
gr.Dropdown(choices=
|
|
|
|
|
|
|
| 119 |
)
|
| 120 |
except Exception:
|
| 121 |
return (
|
| 122 |
gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
|
| 123 |
gr.Dropdown(choices=[], value=None),
|
|
|
|
|
|
|
| 124 |
)
|
| 125 |
|
| 126 |
# draw an uncolored UMAP
|
|
@@ -562,6 +613,7 @@ with gr.Blocks(title=APP_TITLE, css=css) as demo:
|
|
| 562 |
layer_dd = gr.Dropdown(choices=["<use .X>"], value="<use .X>", label="Layer to use (default: .X)", scale=1)
|
| 563 |
with gr.Column(scale=1):
|
| 564 |
var_dd = gr.Dropdown(choices=[], value=None, label="Name of .var column with Ensembl gene IDs (or gene symbols)")
|
|
|
|
| 565 |
use_symbols_chk = gr.Checkbox(label="Selected .var column contains gene symbols (attempt conversion to Ensembl IDs)", value=False)
|
| 566 |
|
| 567 |
# run button
|
|
@@ -580,10 +632,40 @@ with gr.Blocks(title=APP_TITLE, css=css) as demo:
|
|
| 580 |
emb_parquet = gr.DownloadButton(label="Download embeddings (.parquet)")
|
| 581 |
adata_with_emb = gr.DownloadButton(label="Download AnnData with embeddings in .obsm (.h5ad)")
|
| 582 |
|
| 583 |
-
#
|
| 584 |
-
|
| 585 |
-
|
| 586 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 587 |
|
| 588 |
# wire UMAP recoloring
|
| 589 |
obs_dd.change(recolor_umap, inputs=[obs_dd, coords_state, h5ad_state], outputs=[umap_img], queue=False)
|
|
|
|
| 50 |
OBS_NONE_OPTION = "(none)"
|
| 51 |
MAX_CATEGORIES = 50
|
| 52 |
|
| 53 |
+
# constants for .var preview
|
| 54 |
+
VAR_PREVIEW_MAX = 5
|
| 55 |
+
|
| 56 |
# helper to read AnnData header
|
| 57 |
def read_anndata_header(fileobj):
|
| 58 |
adata = sc.read_h5ad(fileobj.name, backed="r")
|
|
|
|
| 71 |
X = X.toarray()
|
| 72 |
return X
|
| 73 |
|
| 74 |
+
# helper to summarize .var columns
|
| 75 |
+
def _summarize_var_columns(adata_var_df, preview_max=VAR_PREVIEW_MAX):
|
| 76 |
+
summaries = {}
|
| 77 |
+
choices = []
|
| 78 |
+
for col in adata_var_df.columns:
|
| 79 |
+
s = adata_var_df[col]
|
| 80 |
+
dtype = str(s.dtype)
|
| 81 |
+
ex = pd.Series(s.astype(object)).dropna().astype(str).head(preview_max).tolist()
|
| 82 |
+
preview = ", ".join(ex) if ex else "—"
|
| 83 |
+
if len(preview) > 50:
|
| 84 |
+
preview = preview[:47] + "..."
|
| 85 |
+
lbl = f"{col} · {dtype} · e.g. {preview}"
|
| 86 |
+
summaries[col] = {
|
| 87 |
+
"dtype": dtype,
|
| 88 |
+
"nunique": int(pd.unique(s.astype(object)).size),
|
| 89 |
+
"examples": ex,
|
| 90 |
+
"example_text": ", ".join(ex) if ex else "—",
|
| 91 |
+
}
|
| 92 |
+
choices.append((lbl, col))
|
| 93 |
+
return choices, summaries
|
| 94 |
+
|
| 95 |
+
# helper to format .var column preview in markdown
|
| 96 |
+
def _format_var_preview_md(col, summaries):
|
| 97 |
+
if not col or summaries is None or col not in summaries:
|
| 98 |
+
return "Select a .var column to see a preview."
|
| 99 |
+
info = summaries[col]
|
| 100 |
+
ex = info["examples"]
|
| 101 |
+
rows = "\n".join([f"| {i+1} | {v} |" for i, v in enumerate(ex)]) if ex else "| — | — |"
|
| 102 |
+
return (
|
| 103 |
+
f"**Column:** `{col}` \n"
|
| 104 |
+
f"**dtype:** `{info['dtype']}` · **unique:** {info['nunique']}\n\n"
|
| 105 |
+
f"**Examples (up to {len(ex)}):**\n\n"
|
| 106 |
+
f"| # | value |\n|---:|:------|\n{rows}"
|
| 107 |
+
)
|
| 108 |
+
|
| 109 |
# helper to compute UMAP from given embeddings
|
| 110 |
def _compute_umap_from_emb(emb):
|
| 111 |
ad_umap = ad.AnnData(X=emb)
|
|
|
|
| 148 |
return (
|
| 149 |
gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
|
| 150 |
gr.Dropdown(choices=[], value=None),
|
| 151 |
+
gr.State({}),
|
| 152 |
+
gr.Markdown.update(value="")
|
| 153 |
)
|
| 154 |
try:
|
| 155 |
+
adata = sc.read_h5ad(fileobj.name, backed="r")
|
| 156 |
+
adata.var = adata.var.reset_index(drop=False, names="index")
|
| 157 |
+
layers = list(adata.layers.keys())
|
| 158 |
+
var_choices, var_summaries = _summarize_var_columns(adata.var)
|
| 159 |
+
del adata
|
| 160 |
+
gc.collect()
|
| 161 |
+
default_var = var_choices[0][1] if var_choices else None
|
| 162 |
+
preview_md = _format_var_preview_md(default_var, var_summaries) if default_var else ""
|
| 163 |
return (
|
| 164 |
gr.Dropdown(choices=["<use .X>"] + layers, value="<use .X>"),
|
| 165 |
+
gr.Dropdown(choices=var_choices, value=default_var),
|
| 166 |
+
gr.State(var_summaries)
|
| 167 |
+
gr.Markdown.update(value=preview_md)
|
| 168 |
)
|
| 169 |
except Exception:
|
| 170 |
return (
|
| 171 |
gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
|
| 172 |
gr.Dropdown(choices=[], value=None),
|
| 173 |
+
gr.State({}),
|
| 174 |
+
gr.Markdown.update(value="")
|
| 175 |
)
|
| 176 |
|
| 177 |
# draw an uncolored UMAP
|
|
|
|
| 613 |
layer_dd = gr.Dropdown(choices=["<use .X>"], value="<use .X>", label="Layer to use (default: .X)", scale=1)
|
| 614 |
with gr.Column(scale=1):
|
| 615 |
var_dd = gr.Dropdown(choices=[], value=None, label="Name of .var column with Ensembl gene IDs (or gene symbols)")
|
| 616 |
+
var_preview_md = gr.Markdown("")
|
| 617 |
use_symbols_chk = gr.Checkbox(label="Selected .var column contains gene symbols (attempt conversion to Ensembl IDs)", value=False)
|
| 618 |
|
| 619 |
# run button
|
|
|
|
| 632 |
emb_parquet = gr.DownloadButton(label="Download embeddings (.parquet)")
|
| 633 |
adata_with_emb = gr.DownloadButton(label="Download AnnData with embeddings in .obsm (.h5ad)")
|
| 634 |
|
| 635 |
+
# when file changes, refresh dropdowns
|
| 636 |
+
var_summaries_state = gr.State({})
|
| 637 |
+
f_in.change(
|
| 638 |
+
ensure_dropdowns,
|
| 639 |
+
inputs=[f_in],
|
| 640 |
+
outputs=[layer_dd, var_dd, var_summaries_state, var_preview_md],
|
| 641 |
+
queue=False
|
| 642 |
+
)
|
| 643 |
+
|
| 644 |
+
# when var selection changes, update the preview markdown
|
| 645 |
+
def _update_var_preview(selected, summaries):
|
| 646 |
+
return _format_var_preview_md(selected, summaries or {})
|
| 647 |
+
var_dd.change(
|
| 648 |
+
_update_var_preview,
|
| 649 |
+
inputs=[var_dd, var_summaries_state],
|
| 650 |
+
outputs=[var_preview_md],
|
| 651 |
+
queue=False
|
| 652 |
+
)
|
| 653 |
+
|
| 654 |
+
# run pipeline on button click
|
| 655 |
+
evt = run_btn.click(
|
| 656 |
+
run_pipeline,
|
| 657 |
+
inputs=[f_in, layer_dd, var_dd, use_symbols_chk],
|
| 658 |
+
outputs=[umap_img, emb_parquet, adata_with_emb, layer_dd, var_dd, obs_dd, coords_state, h5ad_state],
|
| 659 |
+
queue=True
|
| 660 |
+
)
|
| 661 |
+
|
| 662 |
+
# refresh dropdowns after run
|
| 663 |
+
evt.then(
|
| 664 |
+
ensure_dropdowns,
|
| 665 |
+
inputs=[f_in],
|
| 666 |
+
outputs=[layer_dd, var_dd],
|
| 667 |
+
queue=False
|
| 668 |
+
)
|
| 669 |
|
| 670 |
# wire UMAP recoloring
|
| 671 |
obs_dd.change(recolor_umap, inputs=[obs_dd, coords_state, h5ad_state], outputs=[umap_img], queue=False)
|