wenjun99 commited on
Commit
62845fd
Β·
verified Β·
1 Parent(s): 21c475f

Update src/app.py

Browse files
Files changed (1) hide show
  1. src/app.py +176 -4
src/app.py CHANGED
@@ -5,6 +5,9 @@ import re
5
  import numpy as np
6
  import openpyxl
7
  import base64
 
 
 
8
 
9
  # =========================
10
  # Streamlit App Setup
@@ -141,7 +144,7 @@ def decode_from_binary(bits: list[int], scheme: str) -> str:
141
  # =========================
142
  # Tabs
143
  # =========================
144
- tab1, tab2, tab3 = st.tabs(["Encoding", "Decoding", "Writing"])
145
 
146
  # --------------------------------------------------
147
  # TAB 1: Text β†’ Binary
@@ -295,11 +298,11 @@ with tab2:
295
  else:
296
  recovered_text = decode_from_binary(bits, decode_scheme)
297
  st.success(f"βœ… Conversion complete using **{decode_scheme}**!")
298
- st.markdown("**Decoded text:**")
299
  st.text_area("Output", recovered_text, height=150)
300
 
301
  st.download_button(
302
- "⬇️ Download Decoded Text (.txt)",
303
  data=recovered_text,
304
  file_name="recovered_text.txt",
305
  mime="text/plain",
@@ -311,9 +314,178 @@ with tab2:
311
  st.info("πŸ‘† Upload a file to start the reverse conversion.")
312
 
313
  # --------------------------------------------------
314
- # TAB 3: Pipetting Command Generator
315
  # --------------------------------------------------
316
  with tab3:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  from math import ceil
318
 
319
  st.header("πŸ§ͺ Pipetting Command Generator for Eppendorf epMotion liquid handler")
 
5
  import numpy as np
6
  import openpyxl
7
  import base64
8
+ import matplotlib.pyplot as plt
9
+ import matplotlib.colors as mcolors
10
+ from scipy.stats import gaussian_kde
11
 
12
  # =========================
13
  # Streamlit App Setup
 
144
  # =========================
145
  # Tabs
146
  # =========================
147
+ tab1, tab2, tab3, tab4 = st.tabs(["Encoding", "Decoding", "Data Analytics", "Writing"])
148
 
149
  # --------------------------------------------------
150
  # TAB 1: Text β†’ Binary
 
298
  else:
299
  recovered_text = decode_from_binary(bits, decode_scheme)
300
  st.success(f"βœ… Conversion complete using **{decode_scheme}**!")
301
+ st.markdown("**Recovered text:**")
302
  st.text_area("Output", recovered_text, height=150)
303
 
304
  st.download_button(
305
+ "⬇️ Download Recovered Text (.txt)",
306
  data=recovered_text,
307
  file_name="recovered_text.txt",
308
  mime="text/plain",
 
314
  st.info("πŸ‘† Upload a file to start the reverse conversion.")
315
 
316
  # --------------------------------------------------
317
+ # TAB 3: Data Analytics
318
  # --------------------------------------------------
319
  with tab3:
320
+ st.header("πŸ“Š Data Analytics")
321
+ st.markdown("""
322
+ Upload your sample data file (Excel or CSV) for a quick exploratory assessment.
323
+ The file should contain samples as rows and position columns with editing values.
324
+ This tab provides visualizations **before** any binary labelling.
325
+ """)
326
+
327
+ analytics_uploaded = st.file_uploader(
328
+ "πŸ“€ Upload data file",
329
+ type=["xlsx", "csv"],
330
+ key="analytics_uploader"
331
+ )
332
+
333
+ if analytics_uploaded is not None:
334
+ try:
335
+ # --- Load ---
336
+ if analytics_uploaded.name.endswith(".xlsx"):
337
+ adf = pd.read_excel(analytics_uploaded)
338
+ else:
339
+ adf = pd.read_csv(analytics_uploaded)
340
+
341
+ st.success(f"βœ… Loaded file with {len(adf)} rows and {len(adf.columns)} columns")
342
+ adf.columns = [str(c).strip() for c in adf.columns]
343
+
344
+ # --- Detect position columns ---
345
+ non_pos_keywords = {"sample", "description", "descritpion", "total edited",
346
+ 'volume per "1"', "volume per 1", "id", "name"}
347
+ position_cols = [c for c in adf.columns
348
+ if c.lower() not in non_pos_keywords
349
+ and pd.to_numeric(adf[c], errors="coerce").notna().any()]
350
+
351
+ def pos_sort_key(col_name: str):
352
+ m = re.search(r"(\d+)", col_name)
353
+ return int(m.group(1)) if m else 10**9
354
+ position_cols = sorted(position_cols, key=pos_sort_key)
355
+
356
+ if not position_cols:
357
+ st.error("No numeric position columns detected.")
358
+ st.stop()
359
+
360
+ st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** samples.")
361
+
362
+ # Convert position data to numeric
363
+ pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
364
+
365
+ # Compute Total edited (sum across positions per sample)
366
+ if "Total edited" in adf.columns:
367
+ total_edited = pd.to_numeric(adf["Total edited"], errors="coerce").fillna(0.0)
368
+ else:
369
+ total_edited = pos_data.sum(axis=1)
370
+
371
+ # =====================================================
372
+ # Shared controls for raw data plots
373
+ # =====================================================
374
+ st.markdown("### 1️⃣ Raw Data Distribution")
375
+ st.caption("Visualize editing values across all positions and samples β€” before any binary labelling.")
376
+
377
+ log_toggle = st.checkbox("Apply log1p transformation to values", value=False, key="log_toggle")
378
+
379
+ # Melt data to long format: (sample, position_index, value)
380
+ melted = pos_data.melt(var_name="Position", value_name="Value")
381
+ melted["Position_idx"] = melted["Position"].apply(
382
+ lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
383
+ )
384
+ if log_toggle:
385
+ melted["Value"] = np.log1p(melted["Value"])
386
+ value_label = "Editing Value (log1p)"
387
+ else:
388
+ value_label = "Editing Value"
389
+
390
+ # =====================================================
391
+ # PLOT 2: Histogram β€” all values
392
+ # =====================================================
393
+ st.markdown("#### πŸ“Š Histogram β€” All Values")
394
+
395
+ n_bins = st.slider("Number of bins:", min_value=20, max_value=200, value=80, key="hist_bins")
396
+
397
+ fig2, ax2 = plt.subplots(figsize=(10, 4))
398
+ ax2.hist(melted["Value"].values, bins=n_bins, color="#4F46E5", edgecolor="white", linewidth=0.3)
399
+ ax2.set_xlabel(value_label)
400
+ ax2.set_ylabel("Count")
401
+ transform_label = "log1p" if log_toggle else "linear"
402
+ ax2.set_title(f"Raw Values Distribution ({transform_label})")
403
+ # Fine x-axis ticks: every 0.2 for log1p, every 5 for linear
404
+ val_max = melted["Value"].max()
405
+ if log_toggle:
406
+ ax2.set_xticks(np.arange(0, val_max + 0.2, 0.2))
407
+ else:
408
+ ax2.set_xticks(np.arange(0, val_max + 5, 5))
409
+ ax2.tick_params(axis='x', labelsize=8, rotation=45)
410
+ ax2.grid(axis='y', alpha=0.3)
411
+ fig2.tight_layout()
412
+ st.pyplot(fig2)
413
+
414
+ # =====================================================
415
+ # PLOT 3: FACS-style density scatter
416
+ # =====================================================
417
+ st.markdown("#### 2️⃣ Density Scatter Plot (FACS-style)")
418
+ st.caption("Each dot = one measurement (sample Γ— position). Color = local point density.")
419
+
420
+ x_vals = melted["Position_idx"].values.astype(float)
421
+ y_vals = melted["Value"].values.astype(float)
422
+
423
+ # Add small jitter to x for visual separation
424
+ x_jittered = x_vals + np.random.default_rng(42).uniform(-0.3, 0.3, size=len(x_vals))
425
+
426
+ # Compute density
427
+ with st.spinner("Computing point density..."):
428
+ try:
429
+ xy = np.vstack([x_jittered, y_vals])
430
+ density = gaussian_kde(xy)(xy)
431
+ except np.linalg.LinAlgError:
432
+ density = np.ones(len(x_vals))
433
+
434
+ # Sort by density so dense points render on top
435
+ sort_idx = density.argsort()
436
+ x_plot = x_jittered[sort_idx]
437
+ y_plot = y_vals[sort_idx]
438
+ d_plot = density[sort_idx]
439
+
440
+ fig3, ax3 = plt.subplots(figsize=(12, 6))
441
+ scatter = ax3.scatter(x_plot, y_plot, c=d_plot, cmap="jet", s=8, alpha=0.7, edgecolors="none")
442
+ cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
443
+ ax3.set_xlabel("Position")
444
+ ax3.set_ylabel(value_label)
445
+ ax3.set_title(f"Density Scatter β€” Position vs. {value_label}")
446
+ ax3.set_xticks(sorted(melted["Position_idx"].unique()))
447
+ ax3.grid(alpha=0.2)
448
+ fig3.tight_layout()
449
+ st.pyplot(fig3)
450
+
451
+ # =====================================================
452
+ # PLOT 4: 2D Density Heatmap
453
+ # =====================================================
454
+ st.markdown("#### 3️⃣ 2D Density Heatmap")
455
+ st.caption("Binned heatmap of editing values by position β€” similar to a FACS density plot.")
456
+
457
+ y_bins = st.slider("Vertical bins:", min_value=20, max_value=150, value=60, key="heatmap_ybins")
458
+
459
+ positions_unique = sorted(melted["Position_idx"].unique())
460
+ n_positions = len(positions_unique)
461
+
462
+ fig4, ax4 = plt.subplots(figsize=(12, 6))
463
+ h = ax4.hist2d(
464
+ x_vals, y_vals,
465
+ bins=[n_positions, y_bins],
466
+ cmap="jet",
467
+ norm=mcolors.LogNorm() if melted["Value"].max() > 0 else None,
468
+ )
469
+ fig4.colorbar(h[3], ax=ax4, label="Count (log scale)")
470
+ ax4.set_xlabel("Position")
471
+ ax4.set_ylabel(value_label)
472
+ ax4.set_title(f"2D Density Heatmap β€” Position vs. {value_label}")
473
+ ax4.set_xticks(positions_unique)
474
+ ax4.grid(alpha=0.15)
475
+ fig4.tight_layout()
476
+ st.pyplot(fig4)
477
+
478
+ except Exception as e:
479
+ st.error(f"❌ Error processing file: {e}")
480
+ import traceback
481
+ st.code(traceback.format_exc())
482
+ else:
483
+ st.info("πŸ‘† Upload a data file (CSV or Excel) to start exploring.")
484
+
485
+ # --------------------------------------------------
486
+ # TAB 4: Pipetting Command Generator
487
+ # --------------------------------------------------
488
+ with tab4:
489
  from math import ceil
490
 
491
  st.header("πŸ§ͺ Pipetting Command Generator for Eppendorf epMotion liquid handler")