eaglelandsonce commited on
Commit
893ed02
1 Parent(s): 46b0664

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -0
app.py ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+
5
+ # Function to generate synthetic data
6
+ def generate_synthetic_data(num_members):
7
+ unique_ids = [f"MEM_{i:05d}" for i in range(1, num_members + 1)]
8
+ primary_keys = [f"PPK_{i:05d}" for i in range(1, num_members + 1)]
9
+
10
+ # Synthetic Enrollments
11
+ enrollments_data = {
12
+ "MEM_AGE": np.random.randint(18, 80, num_members),
13
+ "MEM_MSA_NAME": np.random.choice(["DETROIT", "HONOLULU", "LOS ANGELES"], num_members),
14
+ "MEM_STAT": np.random.choice(["ACTIVE", "INACTIVE"], num_members),
15
+ "MEMBER_ID": unique_ids,
16
+ "PRIMARY_PERSON_KEY": primary_keys,
17
+ "PAYER_LOB": np.random.choice(["MEDICAID", "COMMERCIAL", "MEDICARE"], num_members),
18
+ "PAYER_TYPE": np.random.choice(["PPO", "HMO"], num_members),
19
+ "PRIMARY_CHRONIC_CONDITION_ROLLUP_DESC": np.random.choice(["Cancer", "Diabetes", "Hypertension"], num_members),
20
+ "PROD_TYPE": np.random.choice(["DENTAL", "VISION", "MEDICAL"], num_members),
21
+ "RELATION": np.random.choice(["SUBSCRIBER", "DEPENDENT"], num_members),
22
+ }
23
+ enrollments_df = pd.DataFrame(enrollments_data)
24
+
25
+ # Synthetic Members
26
+ members_data = {
27
+ "MEM_ETHNICITY": np.random.choice(["Hispanic", "Non-Hispanic", None], num_members),
28
+ "MEM_GENDER": np.random.choice(["M", "F"], num_members),
29
+ "MEM_MSA_NAME": enrollments_data["MEM_MSA_NAME"],
30
+ "MEM_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
31
+ "MEM_ZIP3": np.random.randint(100, 999, num_members),
32
+ "MEMBER_ID": unique_ids,
33
+ "PRIMARY_PERSON_KEY": primary_keys,
34
+ }
35
+ members_df = pd.DataFrame(members_data)
36
+
37
+ # Synthetic Providers
38
+ providers_data = {
39
+ "PROV_CLINIC_STATE": np.random.choice(["MI", "HI", "CA"], num_members),
40
+ "PROV_CLINIC_ZIP": np.random.randint(10000, 99999, num_members),
41
+ "PROV_KEY": [f"PK_{i:05d}" for i in range(1, num_members + 1)],
42
+ "PROV_TAXONOMY": np.random.choice(["208100000X", "207RE0101X"], num_members),
43
+ "PROV_TYPE": np.random.choice(["Type1", "Type2"], num_members),
44
+ }
45
+ providers_df = pd.DataFrame(providers_data)
46
+
47
+ # Synthetic Services
48
+ services_data = {
49
+ "MEMBER_ID": unique_ids,
50
+ "PRIMARY_PERSON_KEY": primary_keys,
51
+ "Sum of AMT_ALLOWED": np.random.uniform(1000, 10000, num_members),
52
+ "Sum of AMT_BILLED": np.random.uniform(1000, 15000, num_members),
53
+ "RELATION": enrollments_data["RELATION"],
54
+ "SERVICE_SETTING": np.random.choice(["OUTPATIENT", "INPATIENT"], num_members),
55
+ }
56
+ services_df = pd.DataFrame(services_data)
57
+
58
+ return enrollments_df, members_df, providers_df, services_df
59
+
60
+
61
+ # Streamlit App
62
+ st.title("Synthetic Medical Billing Data Generator")
63
+
64
+ # Slider for number of members
65
+ num_members = st.slider("Select number of unique members:", min_value=10, max_value=1000, step=10, value=100)
66
+
67
+ # Generate synthetic data
68
+ enrollments_df, members_df, providers_df, services_df = generate_synthetic_data(num_members)
69
+
70
+ # Display dataframes
71
+ st.subheader("Preview of Generated Data")
72
+ st.write("Enrollments Data")
73
+ st.dataframe(enrollments_df.head())
74
+ st.write("Members Data")
75
+ st.dataframe(members_df.head())
76
+ st.write("Providers Data")
77
+ st.dataframe(providers_df.head())
78
+ st.write("Services Data")
79
+ st.dataframe(services_df.head())
80
+
81
+ # Allow downloading the generated files
82
+ st.subheader("Download Synthetic Data")
83
+ st.download_button(
84
+ label="Download Enrollments Data",
85
+ data=enrollments_df.to_csv(index=False),
86
+ file_name="Synthetic_Enrollments.csv",
87
+ mime="text/csv",
88
+ )
89
+ st.download_button(
90
+ label="Download Members Data",
91
+ data=members_df.to_csv(index=False),
92
+ file_name="Synthetic_Members.csv",
93
+ mime="text/csv",
94
+ )
95
+ st.download_button(
96
+ label="Download Providers Data",
97
+ data=providers_df.to_csv(index=False),
98
+ file_name="Synthetic_Providers.csv",
99
+ mime="text/csv",
100
+ )
101
+ st.download_button(
102
+ label="Download Services Data",
103
+ data=services_df.to_csv(index=False),
104
+ file_name="Synthetic_Services.csv",
105
+ mime="text/csv",
106
+ )
107
+