Spaces:
Sleeping
Sleeping
eaglelandsonce
commited on
Commit
•
893ed02
1
Parent(s):
46b0664
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,107 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
+
|
5 |
+
# Function to generate synthetic data
|
6 |
+
def generate_synthetic_data(num_members):
|
7 |
+
unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)]
|
8 |
+
primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)]
|
9 |
+
|
10 |
+
# Synthetic Enrollments
|
11 |
+
enrollments_data = {
|
12 |
+
"MEM_AGE": np.random.randint(18, 80, num_members),
|
13 |
+
"MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members),
|
14 |
+
"MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members),
|
15 |
+
"MEMBER_ID": unique_ids,
|
16 |
+
"PRIMARY_PERSON_KEY": primary_keys,
|
17 |
+
"PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members),
|
18 |
+
"PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members),
|
19 |
+
"PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members),
|
20 |
+
"PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members),
|
21 |
+
"RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members),
|
22 |
+
}
|
23 |
+
enrollments_df = pd.DataFrame(enrollments_data)
|
24 |
+
|
25 |
+
# Synthetic Members
|
26 |
+
members_data = {
|
27 |
+
"MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members),
|
28 |
+
"MEM_GENDER": np.random.choice(["M", "F"], num_members),
|
29 |
+
"MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"],
|
30 |
+
"MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
|
31 |
+
"MEM_ZIP3": np.random.randint(100, 999, num_members),
|
32 |
+
"MEMBER_ID": unique_ids,
|
33 |
+
"PRIMARY_PERSON_KEY": primary_keys,
|
34 |
+
}
|
35 |
+
members_df = pd.DataFrame(members_data)
|
36 |
+
|
37 |
+
# Synthetic Providers
|
38 |
+
providers_data = {
|
39 |
+
"PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
|
40 |
+
"PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members),
|
41 |
+
"PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)],
|
42 |
+
"PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members),
|
43 |
+
"PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members),
|
44 |
+
}
|
45 |
+
providers_df = pd.DataFrame(providers_data)
|
46 |
+
|
47 |
+
# Synthetic Services
|
48 |
+
services_data = {
|
49 |
+
"MEMBER_ID": unique_ids,
|
50 |
+
"PRIMARY_PERSON_KEY": primary_keys,
|
51 |
+
"Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members),
|
52 |
+
"Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members),
|
53 |
+
"RELATION": enrollments_data["RELATION"],
|
54 |
+
"SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members),
|
55 |
+
}
|
56 |
+
services_df = pd.DataFrame(services_data)
|
57 |
+
|
58 |
+
return enrollments_df, members_df, providers_df, services_df
|
59 |
+
|
60 |
+
|
61 |
+
# Streamlit App
|
62 |
+
st.title("Synthetic Medical Billing Data Generator")
|
63 |
+
|
64 |
+
# Slider for number of members
|
65 |
+
num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100)
|
66 |
+
|
67 |
+
# Generate synthetic data
|
68 |
+
enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members)
|
69 |
+
|
70 |
+
# Display dataframes
|
71 |
+
st.subheader("Preview of Generated Data")
|
72 |
+
st.write("Enrollments Data")
|
73 |
+
st.dataframe(enrollments_df.head())
|
74 |
+
st.write("Members Data")
|
75 |
+
st.dataframe(members_df.head())
|
76 |
+
st.write("Providers Data")
|
77 |
+
st.dataframe(providers_df.head())
|
78 |
+
st.write("Services Data")
|
79 |
+
st.dataframe(services_df.head())
|
80 |
+
|
81 |
+
# Allow downloading the generated files
|
82 |
+
st.subheader("Download Synthetic Data")
|
83 |
+
st.download_button(
|
84 |
+
label="Download Enrollments Data",
|
85 |
+
data=enrollments_df.to_csv(index=False),
|
86 |
+
file_name="Synthetic_Enrollments.csv",
|
87 |
+
mime="text/csv",
|
88 |
+
)
|
89 |
+
st.download_button(
|
90 |
+
label="Download Members Data",
|
91 |
+
data=members_df.to_csv(index=False),
|
92 |
+
file_name="Synthetic_Members.csv",
|
93 |
+
mime="text/csv",
|
94 |
+
)
|
95 |
+
st.download_button(
|
96 |
+
label="Download Providers Data",
|
97 |
+
data=providers_df.to_csv(index=False),
|
98 |
+
file_name="Synthetic_Providers.csv",
|
99 |
+
mime="text/csv",
|
100 |
+
)
|
101 |
+
st.download_button(
|
102 |
+
label="Download Services Data",
|
103 |
+
data=services_df.to_csv(index=False),
|
104 |
+
file_name="Synthetic_Services.csv",
|
105 |
+
mime="text/csv",
|
106 |
+
)
|
107 |
+
|