Spaces:

CellularGenomicMedicine
/

PGT-AO

Sleeping

App Files Files Community

pcao commited on Jul 18

Commit

434009f

verified ·

1 Parent(s): fc81d35

Create app.py

Browse files

Files changed (1) hide show

app.py +219 -0

app.py ADDED Viewed

	@@ -0,0 +1,219 @@

+import os
+import re
+import numpy as np
+import pandas as pd
+import gradio as gr
+import plotly.graph_objects as go
+from plotly.subplots import make_subplots
+from collections import defaultdict
+from huggingface_hub import login
+from datasets import load_dataset
+def hex_to_rgba(hex_color, alpha=1):
+    hex_color = hex_color.lstrip('#')
+    r, g, b = tuple(int(hex_color[i:i+2], 16) for i in (0, 2, 4))
+    return f"rgba({r},{g},{b},{alpha})"
+def create_sankey(df):
+    # Data preprocessing
+    df['Aberration'] = df['Aberration'].replace({
+        r'Deletion.*': 'Deletion',
+        r'Duplication.*': 'Duplication'
+    }, regex=True)
+    stages = ["Transferred", "Pos_HCG", "Heart_action", "Birth", "Aberration", "Aff_Chrom", "Par_Or", "Seg_Or"]
+    stage_labels = ["Transfer", "Implantation", "Pregnancy", "Live Birth", "Aberration", "Chromosome", "Parental Origin", "Segregation Origin"]
+    category_orders = {
+        "Transferred": ["Y", "N", "NA"],
+        "Pos_HCG": ["Y", "N", "NA"],
+        "Heart_action": ["Y", "N", "NA"],
+        "Birth": ["Y", "N", "NA"],
+        "Aberration": ["Monosomy", "Trisomy", "Tetraploidy", "Pentasomy", "Mixoploidy", "UPD", "Duplication", "Deletion"],
+        "Par_Or": ["Maternal", "Paternal", "Both", "ND"],
+        "Seg_Or": ["Meiotic I", "Meiotic II", "Mitotic", "ND"],
+        "Aff_Chrom": [str(i) for i in range(1, 23)] + ["X", "GW"]
+    }
+    colors = {
+        "Transferred": {"Y": "#ffe3c5", "N": "#b0cdc0"},
+        "Pos_HCG": {"Y": "#ffe3c5", "N": "#8a9991"},
+        "Heart_action": {"Y": "#ffe3c5", "N": "#8a9991"},
+        "Birth": {"Y": "#ffe3c5", "N": "#8a9991"},
+        "Aberration": {
+            "Monosomy": "#FFFACD", "Trisomy": "#ffc966", "Tetraploidy": "#ffc966",
+            "Pentasomy": "#ffc966", "Mixoploidy": "#ffc966", "UPD": "#d0b783",
+            "Duplication": "#ffc966", "Deletion": "#FFFACD"
+        },
+        "Par_Or": {"Maternal": "#C77CFF", "Paternal": "#F37735", "Both": "#993300", "ND": "#b48585"},
+        "Seg_Or": {"Meiotic I": "#9ECAE1", "Meiotic II": "#003366", "Mitotic": "#3182BD", "ND": "#b48585"},
+        "Aff_Chrom": {str(i).zfill(2): f"#{hex(i*123456%0xFFFFFF)[2:].zfill(6)}99" for i in range(1, 23)} |
+                     {"X": "#66666699", "GW": "#CCCCCC99"}
+    }
+    # Create nodes
+    nodes = {f"{stage}_{value}": idx for idx, (stage, values) in enumerate(
+        [(stage, category_orders.get(stage, sorted(df[stage].unique())))
+         for stage in stages]
+        ) for value in values}
+    # Create links
+    links = []
+    for i in range(len(stages)-1):
+        grouped = df.groupby([stages[i], stages[i+1]]).size().reset_index(name='count')
+        links.extend({
+            "source": nodes[f"{stages[i]}_{row[stages[i]]}"],
+            "target": nodes[f"{stages[i+1]}_{row[stages[i+1]]}"],
+            "value": row['count']
+        } for _, row in grouped.iterrows())
+    # Create node attributes
+    node_labels = []
+    node_colors = []
+    node_x = []
+    node_y = []
+    for node_key, idx in nodes.items():
+        stage, value = node_key.split('_', 1)
+        stage_idx = stages.index(stage)
+        label = "Yes" if value == 'Y' else "No" if value == 'N' else value
+        node_labels.append(label)
+        node_colors.append(hex_to_rgba(colors.get(stage, {}).get(value, "#A2A2A2")))
+        node_x.append(stage_idx / (len(stages) - 1) * 0.9 + 0.05)
+        stage_values = category_orders.get(stage, [])
+        y_pos = 0.01 + (stage_values.index(value) + 0.5) * 0.98 / max(len(stage_values), 1)
+        node_y.append(y_pos)
+    # Create link colors
+    link_colors = [f"rgba({node_colors[link['source']].split('(')[1].split(',')[:3][0]},{node_colors[link['source']].split(',')[1:3][0]},{node_colors[link['source']].split(',')[2].split(',')[0]},0.4)"
+                   for link in links]
+    # Calculate percentages
+    source_totals = defaultdict(int)
+    for link in links:
+        source_totals[link["source"]] += link["value"]
+    link_customdata = [[link["value"] / source_totals[link["source"]] * 100] for link in links]
+    # Create sankey trace
+    sankey_trace = go.Sankey(
+        arrangement="snap",
+        node=dict(
+            pad=20,
+            thickness=40,
+            line=dict(color="black", width=0.8),
+            label=node_labels,
+            color=node_colors,
+            x=node_x,
+            y=node_y,
+            hovertemplate='%{label}<extra></extra>'
+        ),
+        link=dict(
+            source=[link["source"] for link in links],
+            target=[link["target"] for link in links],
+            value=[link["value"] for link in links],
+            color=link_colors,
+            customdata=link_customdata,
+            hovertemplate='%{source.label} → %{target.label}<br>Count: %{value} (%{customdata[0]:.2f}%)<extra></extra>'
+        )
+    )
+    # Create annotations
+    annotations = [
+        dict(
+            x=i / (len(stage_labels) - 1) * 0.88 + 0.068,
+            y=1.02,
+            text=label,
+            showarrow=False,
+            font=dict(size=18, family="Arial"),
+            xanchor="center"
+        ) for i, label in enumerate(stage_labels)
+    ]
+    # Create subplot
+    fig = make_subplots(
+        rows=3, cols=1,
+        row_heights=[0.3, 0.35, 0.35],
+        specs=[[{"type": "sankey"}], [{"type": "sunburst"}], [{"type": "treemap"}]],
+        vertical_spacing=0.1
+    )
+    # Add traces
+    sunburst_trace, treemap_trace = build_sunburst_trace(df)
+    fig.add_trace(sankey_trace, row=1, col=1)
+    fig.add_trace(sunburst_trace, row=2, col=1)
+    fig.add_trace(treemap_trace, row=3, col=1)
+    # Update layout
+    fig.update_layout(
+        title=dict(text="Embryo Aberration Analysis (PGT-AO Study)",
+                  font=dict(size=26, family="Arial"),
+                  x=0.5, y=0.98),
+        width=1700,
+        height=3000,
+        font=dict(family="Arial", size=14),
+        margin=dict(l=200, r=200, t=150, b=200),
+        paper_bgcolor="white",
+        plot_bgcolor="white",
+        annotations=annotations
+    )
+    return fig
+# [Rest of the functions remain largely the same but with minor optimizations]
+def clean_data(df):
+    df = df.copy()
+    df['Aff_Chrom'] = df['Aff_Chrom'].apply(
+        lambda x: 'GW' if str(x) == 'GW' else
+                 f'Chr{x}' if str(x) in ['X', 'Y'] else
+                 f'Chr{str(x).zfill(2)}')
+    df = df.replace('NA', '')
+    df.loc[(df['Transferred'] == 'Y') & (df['Pos_HCG'] == 'N'), ['Heart_action', 'Birth']] = 'Not Applicable'
+    df['Transferred'] = df['Transferred'].fillna('N').replace({'N (deg)': 'N'})
+    df['Transferred'] = df['Transferred'].apply(lambda x: 'N' if x not in ['Y', 'N'] else x)
+    df[['Pos_HCG', 'Heart_action', 'Birth']] = df[['Pos_HCG', 'Heart_action', 'Birth']].fillna('Not Applicable')
+    df.loc[(df['Pos_HCG'] == 'Y') & (df['Heart_action'] == 'N'), 'Birth'] = 'No'
+    df['Par_Or'] = df['Par_Or'].replace('ND', 'Not Determinable')
+    df['Percentage'] = df['Percentage'].replace('Options not set correctly', np.nan)
+    return df
+def create_footer():
+    return """
+    <div style="
+        position: fixed; bottom: 0; left: 0; width: 100%; padding: 15px;
+        background-color: #f8fafc; z-index: 1000; display: flex;
+        align-items: center; justify-content: center; flex-wrap: wrap; gap: 10px;">
+        <div style="text-align: center;">
+            <p style="margin: 5px 0; color: #4b5563;">© 2025 CGM</p>
+            <p style="margin: 0;">
+                <a href="https://www.zamanilab.org/" target="_blank" style="color: #1d4ed8; text-decoration: none; font-weight: 500;">Website</a> |
+                <a href="mailto:masoud.zamaniesteki@mumc.nl" style="color: #1d4ed8; text-decoration: none; font-weight: 500;">Contact</a>
+            </p>
+        </div>
+    </div>
+    """
+def main():
+    login(token=os.environ.get("hf_api_key"))
+    dataset = load_dataset("CellularGenomicMedicine/pgt-ao", data_files="202507_PGTAO.csv", token=True)
+    df = dataset['train'].to_pandas()
+    with gr.Blocks(theme=gr.themes.Soft(), css="...") as demo:
+        gr.Markdown('<h1 class="main-title">🧬 PGT-AO Study Data Visualization Dashboard</h1>')
+        gr.Markdown("...")
+        with gr.Group():
+            gr.Markdown('<div class="chart-title">🌳 Hierarchical Treemap</div>')
+            gr.Plot(create_sankey(df), show_label=False)
+        with gr.Group():
+            gr.Markdown('<div class="chart-title">💡 Key Insights</div>')
+            gr.Markdown("...")
+        gr.HTML(create_footer())
+    demo.launch(show_error=True)
+if __name__ == "__main__":
+    main()