Umair Khan commited on
Commit
8b28b6f
·
1 Parent(s): e48c2ab

first pass of .var preview

Browse files
Files changed (1) hide show
  1. app.py +88 -6
app.py CHANGED
@@ -50,6 +50,9 @@ PARQUET_EMB_COL = "tx1-70m"
50
  OBS_NONE_OPTION = "(none)"
51
  MAX_CATEGORIES = 50
52
 
 
 
 
53
  # helper to read AnnData header
54
  def read_anndata_header(fileobj):
55
  adata = sc.read_h5ad(fileobj.name, backed="r")
@@ -68,6 +71,41 @@ def _pick_layer(adata, layer_name):
68
  X = X.toarray()
69
  return X
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # helper to compute UMAP from given embeddings
72
  def _compute_umap_from_emb(emb):
73
  ad_umap = ad.AnnData(X=emb)
@@ -110,17 +148,30 @@ def ensure_dropdowns(fileobj):
110
  return (
111
  gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
112
  gr.Dropdown(choices=[], value=None),
 
 
113
  )
114
  try:
115
- layers, var_cols, _ = read_anndata_header(fileobj)
 
 
 
 
 
 
 
116
  return (
117
  gr.Dropdown(choices=["<use .X>"] + layers, value="<use .X>"),
118
- gr.Dropdown(choices=var_cols, value=(var_cols[0] if var_cols else None)),
 
 
119
  )
120
  except Exception:
121
  return (
122
  gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
123
  gr.Dropdown(choices=[], value=None),
 
 
124
  )
125
 
126
  # draw an uncolored UMAP
@@ -562,6 +613,7 @@ with gr.Blocks(title=APP_TITLE, css=css) as demo:
562
  layer_dd = gr.Dropdown(choices=["<use .X>"], value="<use .X>", label="Layer to use (default: .X)", scale=1)
563
  with gr.Column(scale=1):
564
  var_dd = gr.Dropdown(choices=[], value=None, label="Name of .var column with Ensembl gene IDs (or gene symbols)")
 
565
  use_symbols_chk = gr.Checkbox(label="Selected .var column contains gene symbols (attempt conversion to Ensembl IDs)", value=False)
566
 
567
  # run button
@@ -580,10 +632,40 @@ with gr.Blocks(title=APP_TITLE, css=css) as demo:
580
  emb_parquet = gr.DownloadButton(label="Download embeddings (.parquet)")
581
  adata_with_emb = gr.DownloadButton(label="Download AnnData with embeddings in .obsm (.h5ad)")
582
 
583
- # wire embedding pipeline
584
- f_in.change(ensure_dropdowns, inputs=[f_in], outputs=[layer_dd, var_dd], queue=False)
585
- evt = run_btn.click(run_pipeline, inputs=[f_in, layer_dd, var_dd, use_symbols_chk], outputs=[umap_img, emb_parquet, adata_with_emb, layer_dd, var_dd, obs_dd, coords_state, h5ad_state], queue=True)
586
- evt.then(ensure_dropdowns, inputs=[f_in], outputs=[layer_dd, var_dd], queue=False)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
587
 
588
  # wire UMAP recoloring
589
  obs_dd.change(recolor_umap, inputs=[obs_dd, coords_state, h5ad_state], outputs=[umap_img], queue=False)
 
50
  OBS_NONE_OPTION = "(none)"
51
  MAX_CATEGORIES = 50
52
 
53
+ # constants for .var preview
54
+ VAR_PREVIEW_MAX = 5
55
+
56
  # helper to read AnnData header
57
  def read_anndata_header(fileobj):
58
  adata = sc.read_h5ad(fileobj.name, backed="r")
 
71
  X = X.toarray()
72
  return X
73
 
74
+ # helper to summarize .var columns
75
+ def _summarize_var_columns(adata_var_df, preview_max=VAR_PREVIEW_MAX):
76
+ summaries = {}
77
+ choices = []
78
+ for col in adata_var_df.columns:
79
+ s = adata_var_df[col]
80
+ dtype = str(s.dtype)
81
+ ex = pd.Series(s.astype(object)).dropna().astype(str).head(preview_max).tolist()
82
+ preview = ", ".join(ex) if ex else "—"
83
+ if len(preview) > 50:
84
+ preview = preview[:47] + "..."
85
+ lbl = f"{col} · {dtype} · e.g. {preview}"
86
+ summaries[col] = {
87
+ "dtype": dtype,
88
+ "nunique": int(pd.unique(s.astype(object)).size),
89
+ "examples": ex,
90
+ "example_text": ", ".join(ex) if ex else "—",
91
+ }
92
+ choices.append((lbl, col))
93
+ return choices, summaries
94
+
95
+ # helper to format .var column preview in markdown
96
+ def _format_var_preview_md(col, summaries):
97
+ if not col or summaries is None or col not in summaries:
98
+ return "Select a .var column to see a preview."
99
+ info = summaries[col]
100
+ ex = info["examples"]
101
+ rows = "\n".join([f"| {i+1} | {v} |" for i, v in enumerate(ex)]) if ex else "| — | — |"
102
+ return (
103
+ f"**Column:** `{col}` \n"
104
+ f"**dtype:** `{info['dtype']}` · **unique:** {info['nunique']}\n\n"
105
+ f"**Examples (up to {len(ex)}):**\n\n"
106
+ f"| # | value |\n|---:|:------|\n{rows}"
107
+ )
108
+
109
  # helper to compute UMAP from given embeddings
110
  def _compute_umap_from_emb(emb):
111
  ad_umap = ad.AnnData(X=emb)
 
148
  return (
149
  gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
150
  gr.Dropdown(choices=[], value=None),
151
+ gr.State({}),
152
+ gr.Markdown.update(value="")
153
  )
154
  try:
155
+ adata = sc.read_h5ad(fileobj.name, backed="r")
156
+ adata.var = adata.var.reset_index(drop=False, names="index")
157
+ layers = list(adata.layers.keys())
158
+ var_choices, var_summaries = _summarize_var_columns(adata.var)
159
+ del adata
160
+ gc.collect()
161
+ default_var = var_choices[0][1] if var_choices else None
162
+ preview_md = _format_var_preview_md(default_var, var_summaries) if default_var else ""
163
  return (
164
  gr.Dropdown(choices=["<use .X>"] + layers, value="<use .X>"),
165
+ gr.Dropdown(choices=var_choices, value=default_var),
166
+ gr.State(var_summaries)
167
+ gr.Markdown.update(value=preview_md)
168
  )
169
  except Exception:
170
  return (
171
  gr.Dropdown(choices=["<use .X>"], value="<use .X>"),
172
  gr.Dropdown(choices=[], value=None),
173
+ gr.State({}),
174
+ gr.Markdown.update(value="")
175
  )
176
 
177
  # draw an uncolored UMAP
 
613
  layer_dd = gr.Dropdown(choices=["<use .X>"], value="<use .X>", label="Layer to use (default: .X)", scale=1)
614
  with gr.Column(scale=1):
615
  var_dd = gr.Dropdown(choices=[], value=None, label="Name of .var column with Ensembl gene IDs (or gene symbols)")
616
+ var_preview_md = gr.Markdown("")
617
  use_symbols_chk = gr.Checkbox(label="Selected .var column contains gene symbols (attempt conversion to Ensembl IDs)", value=False)
618
 
619
  # run button
 
632
  emb_parquet = gr.DownloadButton(label="Download embeddings (.parquet)")
633
  adata_with_emb = gr.DownloadButton(label="Download AnnData with embeddings in .obsm (.h5ad)")
634
 
635
+ # when file changes, refresh dropdowns
636
+ var_summaries_state = gr.State({})
637
+ f_in.change(
638
+ ensure_dropdowns,
639
+ inputs=[f_in],
640
+ outputs=[layer_dd, var_dd, var_summaries_state, var_preview_md],
641
+ queue=False
642
+ )
643
+
644
+ # when var selection changes, update the preview markdown
645
+ def _update_var_preview(selected, summaries):
646
+ return _format_var_preview_md(selected, summaries or {})
647
+ var_dd.change(
648
+ _update_var_preview,
649
+ inputs=[var_dd, var_summaries_state],
650
+ outputs=[var_preview_md],
651
+ queue=False
652
+ )
653
+
654
+ # run pipeline on button click
655
+ evt = run_btn.click(
656
+ run_pipeline,
657
+ inputs=[f_in, layer_dd, var_dd, use_symbols_chk],
658
+ outputs=[umap_img, emb_parquet, adata_with_emb, layer_dd, var_dd, obs_dd, coords_state, h5ad_state],
659
+ queue=True
660
+ )
661
+
662
+ # refresh dropdowns after run
663
+ evt.then(
664
+ ensure_dropdowns,
665
+ inputs=[f_in],
666
+ outputs=[layer_dd, var_dd],
667
+ queue=False
668
+ )
669
 
670
  # wire UMAP recoloring
671
  obs_dd.change(recolor_umap, inputs=[obs_dd, coords_state, h5ad_state], outputs=[umap_img], queue=False)