Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| # Seed for reproducibility | |
| np.random.seed(42) | |
| # Function to generate synthetic data | |
| def generate_realistic_data(num_patients=100): | |
| # Initialize data lists | |
| patient_ids = [] | |
| ages = [] | |
| menopausal_status = [] | |
| tumor_sizes = [] | |
| lymph_nodes = [] | |
| grades = [] | |
| stages = [] | |
| er_status = [] | |
| pr_status = [] | |
| her2_status = [] | |
| ki67_level = [] | |
| tnbc_status = [] | |
| brca_mutation = [] | |
| overall_health = [] | |
| genomic_score = [] | |
| treatment = [] | |
| for i in range(num_patients): | |
| # Patient ID | |
| patient_id = i + 1 # Start patient IDs from 1 | |
| patient_ids.append(patient_id) | |
| # Age: Normally distributed between 30 and 80 years | |
| age = int(np.random.normal(60, 10)) | |
| age = max(30, min(age, 80)) # Ensure age is between 30 and 80 | |
| ages.append(age) | |
| # Menopausal Status: Determined by age | |
| menopausal = 'Post-menopausal' if age >= 50 else 'Pre-menopausal' | |
| menopausal_status.append(menopausal) | |
| # Tumor Size in cm: Log-normal distribution | |
| tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2) | |
| tumor_sizes.append(tumor_size) | |
| # Lymph Node Involvement: Higher chance with larger tumors | |
| lymph_node = 'Positive' if (tumor_size > 2.0 and np.random.rand() < 0.6) or (tumor_size <= 2.0 and np.random.rand() < 0.3) else 'Negative' | |
| lymph_nodes.append(lymph_node) | |
| # Tumor Grade (1-3): Higher grades more likely with larger tumors | |
| grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2]) | |
| grades.append(grade) | |
| # Tumor Stage (I-IV): Based on tumor size and lymph node involvement | |
| if tumor_size <= 2.0 and lymph_node == 'Negative': | |
| stage = 'I' | |
| elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == 'Negative': | |
| stage = 'II' | |
| elif lymph_node == 'Positive' or tumor_size > 5.0: | |
| stage = 'III' | |
| else: | |
| stage = 'II' | |
| if np.random.rand() < 0.05: | |
| stage = 'IV' | |
| stages.append(stage) | |
| # Hormone Receptor Status (ER and PR) | |
| er = np.random.choice(['Positive', 'Negative'], p=[0.75, 0.25]) | |
| pr = 'Positive' if er == 'Positive' and np.random.rand() > 0.1 else 'Negative' | |
| er_status.append(er) | |
| pr_status.append(pr) | |
| # HER2 Status: Correlates with tumor grade | |
| her2 = np.random.choice(['Positive', 'Negative'], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85]) | |
| her2_status.append(her2) | |
| # Ki-67 Level: Higher in higher-grade tumors | |
| ki67 = 'High' if grade == 3 and np.random.rand() < 0.8 else 'Low' | |
| ki67_level.append(ki67) | |
| # Triple-Negative Status (TNBC) | |
| tnbc = 'Positive' if er == 'Negative' and pr == 'Negative' and her2 == 'Negative' else 'Negative' | |
| tnbc_status.append(tnbc) | |
| # BRCA Mutation: Higher in TNBC and younger patients | |
| brca = 'Positive' if tnbc == 'Positive' or age < 40 and np.random.rand() < 0.2 else 'Negative' | |
| brca_mutation.append(brca) | |
| # Overall Health: Varies with age | |
| health = 'Good' if age < 65 and np.random.rand() < 0.9 else 'Poor' | |
| overall_health.append(health) | |
| # Genomic Recurrence Score: For ER+, HER2- patients | |
| recurrence_score = np.random.choice(['Low', 'Intermediate', 'High'], p=[0.6, 0.3, 0.1]) if er == 'Positive' and her2 == 'Negative' else 'N/A' | |
| genomic_score.append(recurrence_score) | |
| # Treatment based on NCCN guidelines | |
| if stage in ['I', 'II']: | |
| if tnbc == 'Positive': | |
| treat = 'Surgery, Chemotherapy, and Radiation Therapy' + (', plus PARP Inhibitors' if brca == 'Positive' else '') | |
| elif er == 'Positive' and recurrence_score != 'N/A': | |
| if recurrence_score == 'High': | |
| treat = 'Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy' | |
| elif recurrence_score == 'Intermediate': | |
| treat = 'Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy' | |
| else: | |
| treat = 'Surgery, Hormone Therapy, and Radiation Therapy' | |
| elif her2 == 'Positive': | |
| treat = 'Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy' | |
| else: | |
| treat = 'Surgery, Chemotherapy, and Radiation Therapy' | |
| elif stage == 'III': | |
| treat = 'Neoadjuvant Chemotherapy, Surgery, Radiation Therapy' + (', HER2-Targeted Therapy' if her2 == 'Positive' else '') + (', Hormone Therapy' if er == 'Positive' else '') | |
| else: | |
| treat = 'Systemic Therapy (' + ', '.join([option for option in ['Hormone Therapy' if er == 'Positive' else '', 'HER2-Targeted Therapy' if her2 == 'Positive' else '', 'Chemotherapy' if tnbc == 'Positive' else ''] if option]) + '), Palliative Care' if health == 'Good' else 'Palliative Care Only' | |
| treatment.append(treat) | |
| # Create DataFrame | |
| data = { | |
| 'Patient ID': patient_ids, | |
| 'Age': ages, | |
| 'Menopausal Status': menopausal_status, | |
| 'Tumor Size (cm)': tumor_sizes, | |
| 'Lymph Node Involvement': lymph_nodes, | |
| 'Tumor Grade': grades, | |
| 'Tumor Stage': stages, | |
| 'ER Status': er_status, | |
| 'PR Status': pr_status, | |
| 'HER2 Status': her2_status, | |
| 'Ki-67 Level': ki67_level, | |
| 'TNBC Status': tnbc_status, | |
| 'BRCA Mutation': brca_mutation, | |
| 'Overall Health': overall_health, | |
| 'Genomic Recurrence Score': genomic_score, | |
| 'Treatment': treatment | |
| } | |
| df = pd.DataFrame(data) | |
| return df | |
| def main(): | |
| st.title('Synthetic Breast Cancer Patient Data Generator') | |
| st.write('This app generates synthetic breast cancer patient data based on NCCN guidelines.') | |
| # User inputs | |
| num_patients = st.number_input('Number of Patients to Generate', min_value=10, max_value=10000, value=100, step=10) | |
| if st.button('Generate Data'): | |
| df = generate_realistic_data(num_patients=num_patients) | |
| st.success(f'Generated data for {num_patients} patients.') | |
| # Display DataFrame | |
| st.dataframe(df) | |
| # Provide download link for data with Treatment column | |
| csv_with_treatment = df.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download data as CSV with Treatment", | |
| data=csv_with_treatment, | |
| file_name='synthetic_breast_cancer_data_with_treatment.csv', | |
| mime='text/csv', | |
| ) | |
| # Provide download link for data with Treatment column renamed to CheckTreatment | |
| df_check_treatment = df.rename(columns={'Treatment': 'CheckTreatment'}) | |
| csv_check_treatment = df_check_treatment.to_csv(index=False).encode('utf-8') | |
| st.download_button( | |
| label="Download data as CSV with CheckTreatment", | |
| data=csv_check_treatment, | |
| file_name='synthetic_breast_cancer_data_with_check_treatment.csv', | |
| mime='text/csv', | |
| ) | |
| if __name__ == '__main__': | |
| main() | |