import streamlit as st import pandas as pd import numpy as np # Function to generate synthetic data def generate_synthetic_data(num_members): unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)] primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)] # Synthetic Enrollments enrollments_data = { "MEM_AGE": np.random.randint(18, 80, num_members), "MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members), "MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members), "MEMBER_ID": unique_ids, "PRIMARY_PERSON_KEY": primary_keys, "PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members), "PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members), "PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members), "Count of PRIMARY_CHRONIC_CONDITION_ROLLUP_ID": np.random.randint(1, 5, num_members), "PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members), "RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members), "Sum of QTY_MM_DN": np.random.randint(0, 10, num_members), "Sum of QTY_MM_MD": np.random.randint(0, 10, num_members), "Sum of QTY_MM_RX": np.random.randint(0, 10, num_members), "Sum of QTY_MM_VS": np.random.randint(0, 10, num_members), "YEARMO": np.random.randint(202201, 202412, num_members), } enrollments_df = pd.DataFrame(enrollments_data) # Synthetic Members members_data = { "MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members), "MEM_GENDER": ["F"] * num_members, # Ensuring all members are female "MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"], "MEM_RACE": np.random.choice(["White", "Black", "Asian", None], num_members), "MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members), "MEM_ZIP3": np.random.randint(100, 999, num_members), "MEMBER_ID": unique_ids, "PRIMARY_PERSON_KEY": primary_keys, } members_df = pd.DataFrame(members_data) # Synthetic Providers providers_data = { "PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members), "PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members), "PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)], "PROV_NPI_ORG": np.random.randint(1, 50, num_members), "PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members), "PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members), } providers_df = pd.DataFrame(providers_data) # Synthetic Services services_data = { "MEMBER_ID": unique_ids, "PRIMARY_PERSON_KEY": primary_keys, "Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members), "Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members), "Count of AMT_PAID": np.random.randint(1, 5, num_members), "ATT_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members), "BILL_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members), "CLAIM_IN_NETWORK": np.random.choice(["Y", "N", None], num_members), "RELATION": enrollments_data["RELATION"], "SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members), "Sum of SERVICE_LINE": np.random.randint(1, 10, num_members), "Sum of SV_UNITS": np.random.randint(1, 100, num_members), "YEARMO": enrollments_data["YEARMO"], } services_df = pd.DataFrame(services_data) return enrollments_df, members_df, providers_df, services_df # Streamlit App st.title("Synthetic Medical Billing Data Generator") # Slider for number of members num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100) # Generate synthetic data enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members) # Display dataframes st.subheader("Preview of Generated Data") st.write("Enrollments Data") st.dataframe(enrollments_df.head()) st.write("Members Data") st.dataframe(members_df.head()) st.write("Providers Data") st.dataframe(providers_df.head()) st.write("Services Data") st.dataframe(services_df.head()) # Allow downloading the generated files st.subheader("Download Synthetic Data") st.download_button( label="Download Enrollments Data", data=enrollments_df.to_csv(index=False), file_name="Synthetic_Enrollments.csv", mime="text/csv", ) st.download_button( label="Download Members Data", data=members_df.to_csv(index=False), file_name="Synthetic_Members.csv", mime="text/csv", ) st.download_button( label="Download Providers Data", data=providers_df.to_csv(index=False), file_name="Synthetic_Providers.csv", mime="text/csv", ) st.download_button( label="Download Services Data", data=services_df.to_csv(index=False), file_name="Synthetic_Services.csv", mime="text/csv", )