Lokahi_data / app.py
eaglelandsonce's picture
Update app.py
2d31353 verified
raw
history blame
7.03 kB
import streamlit as st
import pandas as pd
import numpy as np
# Seed for reproducibility
np.random.seed(42)
# Function to generate synthetic BreastCancer data
def generate_breast_cancer_data(num_patients):
primary_keys = [f"PPK_{i+1:05d}" for i in range(num_patients)]
ages = []
menopausal_status = []
tumor_sizes = []
lymph_nodes = []
grades = []
stages = []
er_status = []
pr_status = []
her2_status = []
ki67_level = []
tnbc_status = []
brca_mutation = []
overall_health = []
genomic_score = []
treatment = []
for i in range(num_patients):
age = int(np.random.normal(60, 10))
age = max(30, min(age, 80))
ages.append(age)
menopausal = "Post-menopausal" if age >= 50 else "Pre-menopausal"
menopausal_status.append(menopausal)
tumor_size = round(np.random.lognormal(mean=0.7, sigma=0.5), 2)
tumor_sizes.append(tumor_size)
lymph_node = (
"Positive"
if (tumor_size > 2.0 and np.random.rand() < 0.6)
or (tumor_size <= 2.0 and np.random.rand() < 0.3)
else "Negative"
)
lymph_nodes.append(lymph_node)
grade = np.random.choice([1, 2, 3], p=[0.1, 0.4, 0.5] if tumor_size > 2.0 else [0.3, 0.5, 0.2])
grades.append(grade)
if tumor_size <= 2.0 and lymph_node == "Negative":
stage = "I"
elif (tumor_size > 2.0 and tumor_size <= 5.0) and lymph_node == "Negative":
stage = "II"
elif lymph_node == "Positive" or tumor_size > 5.0:
stage = "III"
else:
stage = "II"
if np.random.rand() < 0.05:
stage = "IV"
stages.append(stage)
er = np.random.choice(["Positive", "Negative"], p=[0.75, 0.25])
pr = "Positive" if er == "Positive" and np.random.rand() > 0.1 else "Negative"
er_status.append(er)
pr_status.append(pr)
her2 = np.random.choice(["Positive", "Negative"], p=[0.3, 0.7] if grade == 3 else [0.15, 0.85])
her2_status.append(her2)
ki67 = "High" if grade == 3 and np.random.rand() < 0.8 else "Low"
ki67_level.append(ki67)
tnbc = "Positive" if er == "Negative" and pr == "Negative" and her2 == "Negative" else "Negative"
tnbc_status.append(tnbc)
brca = "Positive" if (tnbc == "Positive" or age < 40) and np.random.rand() < 0.2 else "Negative"
brca_mutation.append(brca)
health = "Good" if age < 65 and np.random.rand() < 0.9 else "Poor"
overall_health.append(health)
recurrence_score = (
np.random.choice(["Low", "Intermediate", "High"], p=[0.6, 0.3, 0.1])
if er == "Positive" and her2 == "Negative"
else "N/A"
)
genomic_score.append(recurrence_score)
if stage in ["I", "II"]:
if tnbc == "Positive":
treat = "Surgery, Chemotherapy, and Radiation Therapy"
elif er == "Positive" and recurrence_score != "N/A":
if recurrence_score == "High":
treat = "Surgery, Chemotherapy, Hormone Therapy, and Radiation Therapy"
elif recurrence_score == "Intermediate":
treat = "Surgery, Consider Chemotherapy, Hormone Therapy, and Radiation Therapy"
else:
treat = "Surgery, Hormone Therapy, and Radiation Therapy"
elif her2 == "Positive":
treat = "Surgery, HER2-Targeted Therapy, Chemotherapy, and Radiation Therapy"
else:
treat = "Surgery, Chemotherapy, and Radiation Therapy"
elif stage == "III":
treat = (
"Neoadjuvant Chemotherapy, Surgery, Radiation Therapy"
+ (", HER2-Targeted Therapy" if her2 == "Positive" else "")
+ (", Hormone Therapy" if er == "Positive" else "")
)
else:
treat = "Systemic Therapy (Palliative Care)"
treatment.append(treat)
breast_cancer_data = {
"Patient ID": primary_keys,
"Age": ages,
"Menopausal Status": menopausal_status,
"Tumor Size (cm)": tumor_sizes,
"Lymph Node Involvement": lymph_nodes,
"Tumor Grade": grades,
"Tumor Stage": stages,
"ER Status": er_status,
"PR Status": pr_status,
"HER2 Status": her2_status,
"Ki-67 Level": ki67_level,
"TNBC Status": tnbc_status,
"BRCA Mutation": brca_mutation,
"Overall Health": overall_health,
"Genomic Recurrence Score": genomic_score,
"Treatment": treatment,
}
return pd.DataFrame(breast_cancer_data)
# Function to generate synthetic Members
def generate_members_from_breast_cancer(breast_cancer_df):
members_data = {
"MEMBER_ID": breast_cancer_df["Patient ID"],
"PRIMARY_PERSON_KEY": breast_cancer_df["Patient ID"],
"MEM_GENDER": ["F"] * len(breast_cancer_df),
"MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], len(breast_cancer_df)),
"MEM_RACE": np.random.choice(["White", "Black", "Asian", None], len(breast_cancer_df)),
"MEM_STATE": np.random.choice(["MI", "HI", "CA"], len(breast_cancer_df)),
"MEM_ZIP3": np.random.randint(100, 999, len(breast_cancer_df)),
}
return pd.DataFrame(members_data)
# Function to generate synthetic Services
def generate_services(num_services, primary_keys):
services_data = {
"PRIMARY_PERSON_KEY": np.random.choice(primary_keys, num_services),
"Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_services),
"Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_services),
"Count of AMT_PAID": np.random.randint(1, 5, num_services),
"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_services),
}
return pd.DataFrame(services_data)
# Main Streamlit App
st.title("Synthetic Medical Data Generator")
# Slider for breast cancer patients
num_patients = st.slider("Number of Breast Cancer Patients to Generate", 10, 1000, 100)
num_services = st.slider("Number of Services to Generate", 10, 2000, 500)
if st.button("Generate Data"):
breast_cancer_df = generate_breast_cancer_data(num_patients)
members_df = generate_members_from_breast_cancer(breast_cancer_df)
services_df = generate_services(num_services, breast_cancer_df["Patient ID"].tolist())
# Display and download data
st.subheader("Breast Cancer Data")
st.dataframe(breast_cancer_df.head())
st.download_button("Download Breast Cancer Data", breast_cancer_df.to_csv(index=False), "breast_cancer.csv")
st.subheader("Members Data")
st.dataframe(members_df.head())
st.download_button("Download Members", members_df.to_csv(index=False), "members.csv")
st.subheader("Services Data")
st.dataframe(services_df.head())
st.download_button("Download Services", services_df.to_csv(index=False), "services.csv")