Spaces:
Sleeping
Sleeping
File size: 5,143 Bytes
893ed02 1c0995c 893ed02 1c0995c 893ed02 1c0995c 893ed02 1c0995c 893ed02 1c0995c 893ed02 1c0995c 893ed02 1c0995c 893ed02 c069834 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
import streamlit as st
import pandas as pd
import numpy as np
# Function to generate synthetic data
def generate_synthetic_data(num_members):
unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)]
primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)]
# Synthetic Enrollments
enrollments_data = {
"MEM_AGE": np.random.randint(18, 80, num_members),
"MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members),
"MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members),
"MEMBER_ID": unique_ids,
"PRIMARY_PERSON_KEY": primary_keys,
"PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members),
"PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members),
"PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members),
"Count of PRIMARY_CHRONIC_CONDITION_ROLLUP_ID": np.random.randint(1, 5, num_members),
"PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members),
"RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members),
"Sum of QTY_MM_DN": np.random.randint(0, 10, num_members),
"Sum of QTY_MM_MD": np.random.randint(0, 10, num_members),
"Sum of QTY_MM_RX": np.random.randint(0, 10, num_members),
"Sum of QTY_MM_VS": np.random.randint(0, 10, num_members),
"YEARMO": np.random.randint(202201, 202412, num_members),
}
enrollments_df = pd.DataFrame(enrollments_data)
# Synthetic Members
members_data = {
"MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members),
"MEM_GENDER": ["F"] * num_members, # Ensuring all members are female
"MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"],
"MEM_RACE": np.random.choice(["White", "Black", "Asian", None], num_members),
"MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
"MEM_ZIP3": np.random.randint(100, 999, num_members),
"MEMBER_ID": unique_ids,
"PRIMARY_PERSON_KEY": primary_keys,
}
members_df = pd.DataFrame(members_data)
# Synthetic Providers
providers_data = {
"PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
"PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members),
"PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)],
"PROV_NPI_ORG": np.random.randint(1, 50, num_members),
"PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members),
"PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members),
}
providers_df = pd.DataFrame(providers_data)
# Synthetic Services
services_data = {
"MEMBER_ID": unique_ids,
"PRIMARY_PERSON_KEY": primary_keys,
"Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members),
"Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members),
"Count of AMT_PAID": np.random.randint(1, 5, num_members),
"ATT_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members),
"BILL_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members),
"CLAIM_IN_NETWORK": np.random.choice(["Y", "N", None], num_members),
"RELATION": enrollments_data["RELATION"],
"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members),
"Sum of SERVICE_LINE": np.random.randint(1, 10, num_members),
"Sum of SV_UNITS": np.random.randint(1, 100, num_members),
"YEARMO": enrollments_data["YEARMO"],
}
services_df = pd.DataFrame(services_data)
return enrollments_df, members_df, providers_df, services_df
# Streamlit App
st.title("Synthetic Medical Billing Data Generator")
# Slider for number of members
num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100)
# Generate synthetic data
enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members)
# Display dataframes
st.subheader("Preview of Generated Data")
st.write("Enrollments Data")
st.dataframe(enrollments_df.head())
st.write("Members Data")
st.dataframe(members_df.head())
st.write("Providers Data")
st.dataframe(providers_df.head())
st.write("Services Data")
st.dataframe(services_df.head())
# Allow downloading the generated files
st.subheader("Download Synthetic Data")
st.download_button(
label="Download Enrollments Data",
data=enrollments_df.to_csv(index=False),
file_name="Synthetic_Enrollments.csv",
mime="text/csv",
)
st.download_button(
label="Download Members Data",
data=members_df.to_csv(index=False),
file_name="Synthetic_Members.csv",
mime="text/csv",
)
st.download_button(
label="Download Providers Data",
data=providers_df.to_csv(index=False),
file_name="Synthetic_Providers.csv",
mime="text/csv",
)
st.download_button(
label="Download Services Data",
data=services_df.to_csv(index=False),
file_name="Synthetic_Services.csv",
mime="text/csv",
)
|