Spaces:

eaglelandsonce
/

Lokahi_Solutions

Sleeping

App Files Files Community

eaglelandsonce commited on Dec 8, 2024

Commit

1d766b2

verified ·

1 Parent(s): 1c0dc24

Create 15_Breast_Cancer_Data.py

Browse files

Files changed (1) hide show

pages/15_Breast_Cancer_Data.py +183 -0

pages/15_Breast_Cancer_Data.py ADDED Viewed

	@@ -0,0 +1,183 @@

+import streamlit as st
+import pandas as pd
+import numpy as np
+# Seed for reproducibility
+np.random.seed(42)
+# Function to generate synthetic data
+def generate_realistic_data(num_patients=100):
+    # Initialize data lists
+    patient_ids = []
+    ages = []
+    menopausal_status = []
+    tumor_sizes = []
+    lymph_nodes = []
+    grades = []
+    stages = []
+    er_status = []
+    pr_status = []
+    her2_status = []
+    ki67_level = []
+    tnbc_status = []
+    brca_mutation = []
+    overall_health = []
+    genomic_score = []
+    treatment = []
+    for i in range(num_patients):
+        # Patient ID
+        patient_id = i + 1
+        patient_ids.append(patient_id)
+        # Age
+        age = int(np.random.normal(60, 10))
+        age = max(30, min(age, 80))
+        ages.append(age)
+        # Menopausal Status
+        menopausal = 'Post-menopausal' if age >= 50 else 'Pre-menopausal'
+        menopausal_status.append(menopausal)
+        # Tumor Size
+        tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2)
+        tumor_sizes.append(tumor_size)
+        # Lymph Node Involvement
+        lymph_node = 'Positive' if (tumor_size > 2.0 and np.random.rand() < 0.6) or (tumor_size <= 2.0 and np.random.rand() < 0.3) else 'Negative'
+        lymph_nodes.append(lymph_node)
+        # Tumor Grade
+        grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2])
+        grades.append(grade)
+        # Tumor Stage
+        if tumor_size <= 2.0 and lymph_node == 'Negative':
+            stage = 'I'
+        elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == 'Negative':
+            stage = 'II'
+        elif lymph_node == 'Positive' or tumor_size > 5.0:
+            stage = 'III'
+        else:
+            stage = 'II'
+        if np.random.rand() < 0.05:
+            stage = 'IV'
+        stages.append(stage)
+        # Hormone Receptor Status
+        er = np.random.choice(['Positive', 'Negative'], p=[0.75, 0.25])
+        pr = 'Positive' if er == 'Positive' and np.random.rand() > 0.1 else 'Negative'
+        er_status.append(er)
+        pr_status.append(pr)
+        # HER2 Status
+        her2 = np.random.choice(['Positive', 'Negative'], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85])
+        her2_status.append(her2)
+        # Ki-67 Level
+        ki67 = 'High' if grade == 3 and np.random.rand() < 0.8 else 'Low'
+        ki67_level.append(ki67)
+        # Triple-Negative Status
+        tnbc = 'Positive' if er == 'Negative' and pr == 'Negative' and her2 == 'Negative' else 'Negative'
+        tnbc_status.append(tnbc)
+        # BRCA Mutation
+        brca = 'Positive' if (tnbc == 'Positive' or age < 40) and np.random.rand() < 0.2 else 'Negative'
+        brca_mutation.append(brca)
+        # Overall Health
+        health = 'Good' if age < 65 and np.random.rand() < 0.9 else 'Poor'
+        overall_health.append(health)
+        # Genomic Recurrence Score
+        recurrence_score = np.random.choice(['Low', 'Intermediate', 'High'], p=[0.6, 0.3, 0.1]) if er == 'Positive' and her2 == 'Negative' else 'N/A'
+        genomic_score.append(recurrence_score)
+        # Treatment
+        if stage in ['I', 'II']:
+            if tnbc == 'Positive':
+                treat = 'Surgery, Chemotherapy, and Radiation Therapy' + (', plus PARP Inhibitors' if brca == 'Positive' else '')
+            elif er == 'Positive' and recurrence_score != 'N/A':
+                if recurrence_score == 'High':
+                    treat = 'Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy'
+                elif recurrence_score == 'Intermediate':
+                    treat = 'Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy'
+                else:
+                    treat = 'Surgery, Hormone Therapy, and Radiation Therapy'
+            elif her2 == 'Positive':
+                treat = 'Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy'
+            else:
+                treat = 'Surgery, Chemotherapy, and Radiation Therapy'
+        elif stage == 'III':
+            treat = 'Neoadjuvant Chemotherapy, Surgery, Radiation Therapy' + (', HER2-Targeted Therapy' if her2 == 'Positive' else '') + (', Hormone Therapy' if er == 'Positive' else '')
+        else:
+            treat = 'Systemic Therapy (' + ', '.join([option for option in ['Hormone Therapy' if er == 'Positive' else '', 'HER2-Targeted Therapy' if her2 == 'Positive' else '', 'Chemotherapy' if tnbc == 'Positive' else ''] if option]) + '), Palliative Care' if health == 'Good' else 'Palliative Care Only'
+        treatment.append(treat)
+    # Create DataFrame
+    data = {
+        'Patient ID': patient_ids,
+        'Age': ages,
+        'Menopausal Status': menopausal_status,
+        'Tumor Size (cm)': tumor_sizes,
+        'Lymph Node Involvement': lymph_nodes,
+        'Tumor Grade': grades,
+        'Tumor Stage': stages,
+        'ER Status': er_status,
+        'PR Status': pr_status,
+        'HER2 Status': her2_status,
+        'Ki-67 Level': ki67_level,
+        'TNBC Status': tnbc_status,
+        'BRCA Mutation': brca_mutation,
+        'Overall Health': overall_health,
+        'Genomic Recurrence Score': genomic_score,
+        'Treatment': treatment,
+    }
+    return pd.DataFrame(data)
+# Function to generate fuzzy data
+def generate_fuzzy_data(df, error_rate=0.1):
+    fuzzy_df = df.copy()
+    num_rows, num_cols = fuzzy_df.shape
+    # Introduce errors
+    for _ in range(int(num_rows * num_cols * error_rate)):
+        row = np.random.randint(0, num_rows)
+        col = np.random.randint(0, num_cols)
+        value = fuzzy_df.iloc[row, col]
+        if isinstance(value, str):
+            if value in ['Post-menopausal', 'Pre-menopausal']:
+                fuzzy_df.iloc[row, col] = 'Post-menopausal' if value == 'Pre-menopausal' else 'Pre-menopausal'
+            elif value in ['Positive', 'Negative']:
+                fuzzy_df.iloc[row, col] = 'Negative' if value == 'Positive' else 'Positive'
+        elif isinstance(value, (int, float)):
+            noise = np.random.normal(0, 0.1 * value)
+            fuzzy_df.iloc[row, col] += noise
+    return fuzzy_df
+def main():
+    st.title('Synthetic Data Generator: Clean and Fuzzy (Noisy)')
+    st.write('This app generates synthetic breast cancer patient data and provides downloads for both clean and fuzzy datasets.')
+    num_patients = st.number_input('Number of Patients to Generate', min_value=10, max_value=10000, value=100, step=10)
+    if st.button('Generate Data'):
+        perfect_data = generate_realistic_data(num_patients)
+        fuzzy_data = generate_fuzzy_data(perfect_data, error_rate=0.1)
+        st.subheader('Perfect Data')
+        st.dataframe(perfect_data)
+        st.download_button('Download Perfect Data', perfect_data.to_csv(index=False), file_name='perfect_data.csv')
+        st.subheader('Fuzzy Data (10% Error Rate)')
+        st.dataframe(fuzzy_data)
+        st.download_button('Download Fuzzy Data', fuzzy_data.to_csv(index=False), file_name='fuzzy_data.csv')
+if __name__ == '__main__':
+    main()