Spaces:
Running
Running
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
import pandas as pd
|
3 |
+
import lightgbm as lgb
|
4 |
+
import pickle
|
5 |
+
|
6 |
+
# Load the trained LightGBM model
|
7 |
+
with open('lgb.pkl', 'rb') as model_file:
|
8 |
+
model = pickle.load(model_file)
|
9 |
+
|
10 |
+
# Define mappings
|
11 |
+
workclass_map = {
|
12 |
+
'Private': 1,
|
13 |
+
'State-gov': 2,
|
14 |
+
'Federal-gov': 3,
|
15 |
+
'Self-emp-not-inc': 4,
|
16 |
+
'Self-emp-inc': 5,
|
17 |
+
'Local-gov': 6,
|
18 |
+
'Without-pay': 7,
|
19 |
+
'Never-worked': 8,
|
20 |
+
'?': 9
|
21 |
+
}
|
22 |
+
|
23 |
+
education_ranks = {
|
24 |
+
'Preschool': 1,
|
25 |
+
'1st-4th': 2,
|
26 |
+
'5th-6th': 3,
|
27 |
+
'7th-8th': 4,
|
28 |
+
'9th': 5,
|
29 |
+
'10th': 6,
|
30 |
+
'11th': 7,
|
31 |
+
'12th': 8,
|
32 |
+
'HS-grad': 9,
|
33 |
+
'Some-college': 10,
|
34 |
+
'Assoc-voc': 11,
|
35 |
+
'Assoc-acdm': 12,
|
36 |
+
'Bachelors': 13,
|
37 |
+
'Masters': 14,
|
38 |
+
'Prof-school': 15,
|
39 |
+
'Doctorate': 16
|
40 |
+
}
|
41 |
+
|
42 |
+
marital_map = {
|
43 |
+
'Married-civ-spouse': 1,
|
44 |
+
'Married-spouse-absent': 1,
|
45 |
+
'Married-AF-spouse': 1,
|
46 |
+
'Widowed': 2,
|
47 |
+
'Divorced': 2,
|
48 |
+
'Separated': 2,
|
49 |
+
'Never-married': 2
|
50 |
+
}
|
51 |
+
|
52 |
+
occupation_map = {
|
53 |
+
'Exec-managerial': 1,
|
54 |
+
'Machine-op-inspct': 2,
|
55 |
+
'Prof-specialty': 3,
|
56 |
+
'Other-service': 4,
|
57 |
+
'Adm-clerical': 5,
|
58 |
+
'Craft-repair': 6,
|
59 |
+
'Transport-moving': 7,
|
60 |
+
'Handlers-cleaners': 8,
|
61 |
+
'Sales': 9,
|
62 |
+
'Farming-fishing': 10,
|
63 |
+
'Tech-support': 11,
|
64 |
+
'Protective-serv': 12,
|
65 |
+
'Armed-Forces': 13,
|
66 |
+
'Priv-house-serv': 14
|
67 |
+
}
|
68 |
+
|
69 |
+
relationship_map = {
|
70 |
+
'Not-in-family': 1,
|
71 |
+
'Unmarried': 2,
|
72 |
+
'Own-child': 3,
|
73 |
+
'Other-relative': 4,
|
74 |
+
'Husband': 5,
|
75 |
+
'Wife': 6
|
76 |
+
}
|
77 |
+
|
78 |
+
income_map = {
|
79 |
+
'<=50K': 0,
|
80 |
+
'>50K': 1
|
81 |
+
}
|
82 |
+
|
83 |
+
# Define the input fields for the user to provide data
|
84 |
+
def get_user_input():
|
85 |
+
age = st.number_input('Age', min_value=0, max_value=120, value=30)
|
86 |
+
workclass = st.selectbox('Workclass', list(workclass_map.keys()))
|
87 |
+
fnlwgt = st.number_input('Fnlwgt', min_value=0, value=100000)
|
88 |
+
education = st.selectbox('Education', list(education_ranks.keys()))
|
89 |
+
education_num = st.number_input('Education Num', min_value=0, max_value=20, value=10)
|
90 |
+
marital_status = st.selectbox('Marital Status', list(marital_map.keys()))
|
91 |
+
occupation = st.selectbox('Occupation', list(occupation_map.keys()))
|
92 |
+
relationship = st.selectbox('Relationship', list(relationship_map.keys()))
|
93 |
+
capital_gain = st.number_input('Capital Gain', min_value=0, value=0)
|
94 |
+
capital_loss = st.number_input('Capital Loss', min_value=0, value=0)
|
95 |
+
hours_per_week = st.number_input('Hours Per Week', min_value=0, max_value=168, value=40)
|
96 |
+
|
97 |
+
user_data = {
|
98 |
+
'age': age,
|
99 |
+
'workclass_rank': workclass_map[workclass],
|
100 |
+
'fnlwgt': fnlwgt,
|
101 |
+
'education_rank': education_ranks[education],
|
102 |
+
'education.num': education_num,
|
103 |
+
'marital_status_binary': marital_map[marital_status],
|
104 |
+
'occupation_rank': occupation_map[occupation],
|
105 |
+
'relationship_rank': relationship_map[relationship],
|
106 |
+
'capital.gain': capital_gain,
|
107 |
+
'capital.loss': capital_loss,
|
108 |
+
'hours.per.week': hours_per_week
|
109 |
+
}
|
110 |
+
|
111 |
+
features = pd.DataFrame(user_data, index=[0])
|
112 |
+
return features
|
113 |
+
|
114 |
+
# Main function to run the app
|
115 |
+
def main():
|
116 |
+
st.title('Income Prediction App')
|
117 |
+
st.write('This app predicts whether a person makes over $50K a year based on their demographics and work information.')
|
118 |
+
|
119 |
+
user_input = get_user_input()
|
120 |
+
|
121 |
+
st.subheader('User Input:')
|
122 |
+
st.write(user_input)
|
123 |
+
|
124 |
+
prediction = model.predict(user_input)
|
125 |
+
prediction_proba = model.predict_proba(user_input)[:, 1]
|
126 |
+
|
127 |
+
st.subheader('Prediction:')
|
128 |
+
if prediction[0] == 1:
|
129 |
+
st.write('The model predicts: Income > $50K')
|
130 |
+
else:
|
131 |
+
st.write('The model predicts: Income <= $50K')
|
132 |
+
|
133 |
+
st.subheader('Prediction Probability:')
|
134 |
+
st.write(f'Probability of making over $50K: {prediction_proba[0]:.2f}')
|
135 |
+
|
136 |
+
if __name__ == '__main__':
|
137 |
+
main()
|