Spaces:

EngBioNUS
/

BitConverter

Sleeping

App Files Files Community

wenjun99 commited on Feb 23

Commit

62845fd

verified ·

1 Parent(s): 21c475f

Update src/app.py

Browse files

Files changed (1) hide show

src/app.py +176 -4

src/app.py CHANGED Viewed

@@ -5,6 +5,9 @@ import re
 import numpy as np
 import openpyxl
 import base64
 # =========================
 # Streamlit App Setup
@@ -141,7 +144,7 @@ def decode_from_binary(bits: list[int], scheme: str) -> str:
 # =========================
 # Tabs
 # =========================
-tab1, tab2, tab3 = st.tabs(["Encoding", "Decoding", "Writing"])
 # --------------------------------------------------
 # TAB 1: Text → Binary
@@ -295,11 +298,11 @@ with tab2:
             else:
                 recovered_text = decode_from_binary(bits, decode_scheme)
                 st.success(f"✅ Conversion complete using **{decode_scheme}**!")
-                st.markdown("**Decoded text:**")
                 st.text_area("Output", recovered_text, height=150)
                 st.download_button(
-                    "⬇️ Download Decoded Text (.txt)",
                     data=recovered_text,
                     file_name="recovered_text.txt",
                     mime="text/plain",
@@ -311,9 +314,178 @@ with tab2:
         st.info("👆 Upload a file to start the reverse conversion.")
 # --------------------------------------------------
-# TAB 3: Pipetting Command Generator
 # --------------------------------------------------
 with tab3:
     from math import ceil
     st.header("🧪 Pipetting Command Generator for Eppendorf epMotion liquid handler")

 import numpy as np
 import openpyxl
 import base64
+import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+from scipy.stats import gaussian_kde
 # =========================
 # Streamlit App Setup
 # =========================
 # Tabs
 # =========================
+tab1, tab2, tab3, tab4 = st.tabs(["Encoding", "Decoding", "Data Analytics", "Writing"])
 # --------------------------------------------------
 # TAB 1: Text → Binary
             else:
                 recovered_text = decode_from_binary(bits, decode_scheme)
                 st.success(f"✅ Conversion complete using **{decode_scheme}**!")
+                st.markdown("**Recovered text:**")
                 st.text_area("Output", recovered_text, height=150)
                 st.download_button(
+                    "⬇️ Download Recovered Text (.txt)",
                     data=recovered_text,
                     file_name="recovered_text.txt",
                     mime="text/plain",
         st.info("👆 Upload a file to start the reverse conversion.")
 # --------------------------------------------------
+# TAB 3: Data Analytics
 # --------------------------------------------------
 with tab3:
+    st.header("📊 Data Analytics")
+    st.markdown("""
+    Upload your sample data file (Excel or CSV) for a quick exploratory assessment.
+    The file should contain samples as rows and position columns with editing values.
+    This tab provides visualizations **before** any binary labelling.
+    """)
+    analytics_uploaded = st.file_uploader(
+        "📤 Upload data file",
+        type=["xlsx", "csv"],
+        key="analytics_uploader"
+    )
+    if analytics_uploaded is not None:
+        try:
+            # --- Load ---
+            if analytics_uploaded.name.endswith(".xlsx"):
+                adf = pd.read_excel(analytics_uploaded)
+            else:
+                adf = pd.read_csv(analytics_uploaded)
+            st.success(f"✅ Loaded file with {len(adf)} rows and {len(adf.columns)} columns")
+            adf.columns = [str(c).strip() for c in adf.columns]
+            # --- Detect position columns ---
+            non_pos_keywords = {"sample", "description", "descritpion", "total edited",
+                                'volume per "1"', "volume per 1", "id", "name"}
+            position_cols = [c for c in adf.columns
+                            if c.lower() not in non_pos_keywords
+                            and pd.to_numeric(adf[c], errors="coerce").notna().any()]
+            def pos_sort_key(col_name: str):
+                m = re.search(r"(\d+)", col_name)
+                return int(m.group(1)) if m else 10**9
+            position_cols = sorted(position_cols, key=pos_sort_key)
+            if not position_cols:
+                st.error("No numeric position columns detected.")
+                st.stop()
+            st.info(f"Detected **{len(position_cols)}** position columns and **{len(adf)}** samples.")
+            # Convert position data to numeric
+            pos_data = adf[position_cols].apply(pd.to_numeric, errors="coerce").fillna(0.0)
+            # Compute Total edited (sum across positions per sample)
+            if "Total edited" in adf.columns:
+                total_edited = pd.to_numeric(adf["Total edited"], errors="coerce").fillna(0.0)
+            else:
+                total_edited = pos_data.sum(axis=1)
+            # =====================================================
+            # Shared controls for raw data plots
+            # =====================================================
+            st.markdown("### 1️⃣ Raw Data Distribution")
+            st.caption("Visualize editing values across all positions and samples — before any binary labelling.")
+            log_toggle = st.checkbox("Apply log1p transformation to values", value=False, key="log_toggle")
+            # Melt data to long format: (sample, position_index, value)
+            melted = pos_data.melt(var_name="Position", value_name="Value")
+            melted["Position_idx"] = melted["Position"].apply(
+                lambda x: int(re.search(r"(\d+)", str(x)).group(1)) if re.search(r"(\d+)", str(x)) else 0
+            )
+            if log_toggle:
+                melted["Value"] = np.log1p(melted["Value"])
+                value_label = "Editing Value (log1p)"
+            else:
+                value_label = "Editing Value"
+            # =====================================================
+            # PLOT 2: Histogram — all values
+            # =====================================================
+            st.markdown("#### 📊 Histogram — All Values")
+            n_bins = st.slider("Number of bins:", min_value=20, max_value=200, value=80, key="hist_bins")
+            fig2, ax2 = plt.subplots(figsize=(10, 4))
+            ax2.hist(melted["Value"].values, bins=n_bins, color="#4F46E5", edgecolor="white", linewidth=0.3)
+            ax2.set_xlabel(value_label)
+            ax2.set_ylabel("Count")
+            transform_label = "log1p" if log_toggle else "linear"
+            ax2.set_title(f"Raw Values Distribution ({transform_label})")
+            # Fine x-axis ticks: every 0.2 for log1p, every 5 for linear
+            val_max = melted["Value"].max()
+            if log_toggle:
+                ax2.set_xticks(np.arange(0, val_max + 0.2, 0.2))
+            else:
+                ax2.set_xticks(np.arange(0, val_max + 5, 5))
+            ax2.tick_params(axis='x', labelsize=8, rotation=45)
+            ax2.grid(axis='y', alpha=0.3)
+            fig2.tight_layout()
+            st.pyplot(fig2)
+            # =====================================================
+            # PLOT 3: FACS-style density scatter
+            # =====================================================
+            st.markdown("#### 2️⃣ Density Scatter Plot (FACS-style)")
+            st.caption("Each dot = one measurement (sample × position). Color = local point density.")
+            x_vals = melted["Position_idx"].values.astype(float)
+            y_vals = melted["Value"].values.astype(float)
+            # Add small jitter to x for visual separation
+            x_jittered = x_vals + np.random.default_rng(42).uniform(-0.3, 0.3, size=len(x_vals))
+            # Compute density
+            with st.spinner("Computing point density..."):
+                try:
+                    xy = np.vstack([x_jittered, y_vals])
+                    density = gaussian_kde(xy)(xy)
+                except np.linalg.LinAlgError:
+                    density = np.ones(len(x_vals))
+            # Sort by density so dense points render on top
+            sort_idx = density.argsort()
+            x_plot = x_jittered[sort_idx]
+            y_plot = y_vals[sort_idx]
+            d_plot = density[sort_idx]
+            fig3, ax3 = plt.subplots(figsize=(12, 6))
+            scatter = ax3.scatter(x_plot, y_plot, c=d_plot, cmap="jet", s=8, alpha=0.7, edgecolors="none")
+            cbar = fig3.colorbar(scatter, ax=ax3, label="Density")
+            ax3.set_xlabel("Position")
+            ax3.set_ylabel(value_label)
+            ax3.set_title(f"Density Scatter — Position vs. {value_label}")
+            ax3.set_xticks(sorted(melted["Position_idx"].unique()))
+            ax3.grid(alpha=0.2)
+            fig3.tight_layout()
+            st.pyplot(fig3)
+            # =====================================================
+            # PLOT 4: 2D Density Heatmap
+            # =====================================================
+            st.markdown("#### 3️⃣ 2D Density Heatmap")
+            st.caption("Binned heatmap of editing values by position — similar to a FACS density plot.")
+            y_bins = st.slider("Vertical bins:", min_value=20, max_value=150, value=60, key="heatmap_ybins")
+            positions_unique = sorted(melted["Position_idx"].unique())
+            n_positions = len(positions_unique)
+            fig4, ax4 = plt.subplots(figsize=(12, 6))
+            h = ax4.hist2d(
+                x_vals, y_vals,
+                bins=[n_positions, y_bins],
+                cmap="jet",
+                norm=mcolors.LogNorm() if melted["Value"].max() > 0 else None,
+            )
+            fig4.colorbar(h[3], ax=ax4, label="Count (log scale)")
+            ax4.set_xlabel("Position")
+            ax4.set_ylabel(value_label)
+            ax4.set_title(f"2D Density Heatmap — Position vs. {value_label}")
+            ax4.set_xticks(positions_unique)
+            ax4.grid(alpha=0.15)
+            fig4.tight_layout()
+            st.pyplot(fig4)
+        except Exception as e:
+            st.error(f"❌ Error processing file: {e}")
+            import traceback
+            st.code(traceback.format_exc())
+    else:
+        st.info("👆 Upload a data file (CSV or Excel) to start exploring.")
+# --------------------------------------------------
+# TAB 4: Pipetting Command Generator
+# --------------------------------------------------
+with tab4:
     from math import ceil
     st.header("🧪 Pipetting Command Generator for Eppendorf epMotion liquid handler")