import streamlit as st import pandas as pd import numpy as np # Seed for reproducibility np.random.seed(42) # Function to generate synthetic BreastCancer data def generate_breast_cancer_data(num_patients): primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)] ages = [] menopausal_status = [] tumor_sizes = [] lymph_nodes = [] grades = [] stages = [] er_status = [] pr_status = [] her2_status = [] ki67_level = [] tnbc_status = [] brca_mutation = [] overall_health = [] genomic_score = [] treatment = [] for i in range(num_patients): age = int(np.random.normal(60, 10)) age = max(30, min(age, 80)) ages.append(age) menopausal = "Post-menopausal" if age >= 50 else "Pre-menopausal" menopausal_status.append(menopausal) tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2) tumor_sizes.append(tumor_size) lymph_node = ( "Positive" if (tumor_size > 2.0 and np.random.rand() < 0.6) or (tumor_size <= 2.0 and np.random.rand() < 0.3) else "Negative" ) lymph_nodes.append(lymph_node) grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2]) grades.append(grade) if tumor_size <= 2.0 and lymph_node == "Negative": stage = "I" elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == "Negative": stage = "II" elif lymph_node == "Positive" or tumor_size > 5.0: stage = "III" else: stage = "II" if np.random.rand() < 0.05: stage = "IV" stages.append(stage) er = np.random.choice(["Positive", "Negative"], p=[0.75, 0.25]) pr = "Positive" if er == "Positive" and np.random.rand() > 0.1 else "Negative" er_status.append(er) pr_status.append(pr) her2 = np.random.choice(["Positive", "Negative"], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85]) her2_status.append(her2) ki67 = "High" if grade == 3 and np.random.rand() < 0.8 else "Low" ki67_level.append(ki67) tnbc = "Positive" if er == "Negative" and pr == "Negative" and her2 == "Negative" else "Negative" tnbc_status.append(tnbc) brca = "Positive" if (tnbc == "Positive" or age < 40) and np.random.rand() < 0.2 else "Negative" brca_mutation.append(brca) health = "Good" if age < 65 and np.random.rand() < 0.9 else "Poor" overall_health.append(health) recurrence_score = ( np.random.choice(["Low", "Intermediate", "High"], p=[0.6, 0.3, 0.1]) if er == "Positive" and her2 == "Negative" else "N/A" ) genomic_score.append(recurrence_score) if stage in ["I", "II"]: if tnbc == "Positive": treat = "Surgery, Chemotherapy, and Radiation Therapy" elif er == "Positive" and recurrence_score != "N/A": if recurrence_score == "High": treat = "Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy" elif recurrence_score == "Intermediate": treat = "Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy" else: treat = "Surgery, Hormone Therapy, and Radiation Therapy" elif her2 == "Positive": treat = "Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy" else: treat = "Surgery, Chemotherapy, and Radiation Therapy" elif stage == "III": treat = ( "Neoadjuvant Chemotherapy, Surgery, Radiation Therapy" + (", HER2-Targeted Therapy" if her2 == "Positive" else "") + (", Hormone Therapy" if er == "Positive" else "") ) else: treat = "Systemic Therapy (Palliative Care)" treatment.append(treat) breast_cancer_data = { "Patient ID": primary_keys, "Age": ages, "Menopausal Status": menopausal_status, "Tumor Size (cm)": tumor_sizes, "Lymph Node Involvement": lymph_nodes, "Tumor Grade": grades, "Tumor Stage": stages, "ER Status": er_status, "PR Status": pr_status, "HER2 Status": her2_status, "Ki-67 Level": ki67_level, "TNBC Status": tnbc_status, "BRCA Mutation": brca_mutation, "Overall Health": overall_health, "Genomic Recurrence Score": genomic_score, "Treatment": treatment, } return pd.DataFrame(breast_cancer_data) # Function to generate synthetic Members def generate_members_from_breast_cancer(breast_cancer_df): members_data = { "MEMBER_ID": breast_cancer_df["Patient ID"], "PRIMARY_PERSON_KEY": breast_cancer_df["Patient ID"], "MEM_GENDER": ["F"] * len(breast_cancer_df), "MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], len(breast_cancer_df)), "MEM_RACE": np.random.choice(["White", "Black", "Asian", None], len(breast_cancer_df)), "MEM_STATE": np.random.choice(["MI", "HI", "CA"], len(breast_cancer_df)), "MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)), } return pd.DataFrame(members_data) # Function to generate synthetic Services def generate_services(num_services, primary_keys): services_data = { "PRIMARY_PERSON_KEY": np.random.choice(primary_keys, num_services), "Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_services), "Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_services), "Count of AMT_PAID": np.random.randint(1, 5, num_services), "SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_services), } return pd.DataFrame(services_data) # Main Streamlit App st.title("Synthetic Medical Data Generator") # Slider for breast cancer patients num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100) num_services = st.slider("Number of Services to Generate", 10, 2000, 500) if st.button("Generate Data"): breast_cancer_df = generate_breast_cancer_data(num_patients) members_df = generate_members_from_breast_cancer(breast_cancer_df) services_df = generate_services(num_services, breast_cancer_df["Patient ID"].tolist()) # Display and download data st.subheader("Breast Cancer Data") st.dataframe(breast_cancer_df.head()) st.download_button("Download Breast Cancer Data", breast_cancer_df.to_csv(index=False), "breast_cancer.csv") st.subheader("Members Data") st.dataframe(members_df.head()) st.download_button("Download Members", members_df.to_csv(index=False), "members.csv") st.subheader("Services Data") st.dataframe(services_df.head()) st.download_button("Download Services", services_df.to_csv(index=False), "services.csv")