File size: 5,143 Bytes
893ed02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1c0995c
893ed02
 
1c0995c
 
 
 
 
893ed02
 
 
 
 
 
1c0995c
893ed02
1c0995c
893ed02
 
 
 
 
 
 
 
 
 
 
 
1c0995c
893ed02
 
 
 
 
 
 
 
 
 
 
1c0995c
 
 
 
893ed02
 
1c0995c
 
 
893ed02
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c069834
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import streamlit as st
import pandas as pd
import numpy as np

# Function to generate synthetic data
def generate_synthetic_data(num_members):
    unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)]
    primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)]

    # Synthetic Enrollments
    enrollments_data = {
        "MEM_AGE": np.random.randint(18, 80, num_members),
        "MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members),
        "MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members),
        "MEMBER_ID": unique_ids,
        "PRIMARY_PERSON_KEY": primary_keys,
        "PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members),
        "PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members),
        "PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members),
        "Count of PRIMARY_CHRONIC_CONDITION_ROLLUP_ID": np.random.randint(1, 5, num_members),
        "PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members),
        "RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members),
        "Sum of QTY_MM_DN": np.random.randint(0, 10, num_members),
        "Sum of QTY_MM_MD": np.random.randint(0, 10, num_members),
        "Sum of QTY_MM_RX": np.random.randint(0, 10, num_members),
        "Sum of QTY_MM_VS": np.random.randint(0, 10, num_members),
        "YEARMO": np.random.randint(202201, 202412, num_members),
    }
    enrollments_df = pd.DataFrame(enrollments_data)

    # Synthetic Members
    members_data = {
        "MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members),
        "MEM_GENDER": ["F"] * num_members,  # Ensuring all members are female
        "MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"],
        "MEM_RACE": np.random.choice(["White", "Black", "Asian", None], num_members),
        "MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
        "MEM_ZIP3": np.random.randint(100, 999, num_members),
        "MEMBER_ID": unique_ids,
        "PRIMARY_PERSON_KEY": primary_keys,
    }
    members_df = pd.DataFrame(members_data)

    # Synthetic Providers
    providers_data = {
        "PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
        "PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members),
        "PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)],
        "PROV_NPI_ORG": np.random.randint(1, 50, num_members),
        "PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members),
        "PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members),
    }
    providers_df = pd.DataFrame(providers_data)

    # Synthetic Services
    services_data = {
        "MEMBER_ID": unique_ids,
        "PRIMARY_PERSON_KEY": primary_keys,
        "Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members),
        "Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members),
        "Count of AMT_PAID": np.random.randint(1, 5, num_members),
        "ATT_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members),
        "BILL_PROV_KEY": np.random.choice(providers_data["PROV_KEY"], num_members),
        "CLAIM_IN_NETWORK": np.random.choice(["Y", "N", None], num_members),
        "RELATION": enrollments_data["RELATION"],
        "SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members),
        "Sum of SERVICE_LINE": np.random.randint(1, 10, num_members),
        "Sum of SV_UNITS": np.random.randint(1, 100, num_members),
        "YEARMO": enrollments_data["YEARMO"],
    }
    services_df = pd.DataFrame(services_data)

    return enrollments_df, members_df, providers_df, services_df


# Streamlit App
st.title("Synthetic Medical Billing Data Generator")

# Slider for number of members
num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100)

# Generate synthetic data
enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members)

# Display dataframes
st.subheader("Preview of Generated Data")
st.write("Enrollments Data")
st.dataframe(enrollments_df.head())
st.write("Members Data")
st.dataframe(members_df.head())
st.write("Providers Data")
st.dataframe(providers_df.head())
st.write("Services Data")
st.dataframe(services_df.head())

# Allow downloading the generated files
st.subheader("Download Synthetic Data")
st.download_button(
    label="Download Enrollments Data",
    data=enrollments_df.to_csv(index=False),
    file_name="Synthetic_Enrollments.csv",
    mime="text/csv",
)
st.download_button(
    label="Download Members Data",
    data=members_df.to_csv(index=False),
    file_name="Synthetic_Members.csv",
    mime="text/csv",
)
st.download_button(
    label="Download Providers Data",
    data=providers_df.to_csv(index=False),
    file_name="Synthetic_Providers.csv",
    mime="text/csv",
)
st.download_button(
    label="Download Services Data",
    data=services_df.to_csv(index=False),
    file_name="Synthetic_Services.csv",
    mime="text/csv",
)