import streamlit as st import pandas as pd import lightgbm as lgb import pickle # Load the trained LightGBM model with open('lgb.pkl', 'rb') as model_file: model = pickle.load(model_file) # Define mappings workclass_map = { 'Private': 1, 'State-gov': 2, 'Federal-gov': 3, 'Self-emp-not-inc': 4, 'Self-emp-inc': 5, 'Local-gov': 6, 'Without-pay': 7, 'Never-worked': 8, '?': 9 } education_ranks = { 'Preschool': 1, '1st-4th': 2, '5th-6th': 3, '7th-8th': 4, '9th': 5, '10th': 6, '11th': 7, '12th': 8, 'HS-grad': 9, 'Some-college': 10, 'Assoc-voc': 11, 'Assoc-acdm': 12, 'Bachelors': 13, 'Masters': 14, 'Prof-school': 15, 'Doctorate': 16 } marital_map = { 'Married-civ-spouse': 1, 'Married-spouse-absent': 1, 'Married-AF-spouse': 1, 'Widowed': 2, 'Divorced': 2, 'Separated': 2, 'Never-married': 2 } occupation_map = { 'Exec-managerial': 1, 'Machine-op-inspct': 2, 'Prof-specialty': 3, 'Other-service': 4, 'Adm-clerical': 5, 'Craft-repair': 6, 'Transport-moving': 7, 'Handlers-cleaners': 8, 'Sales': 9, 'Farming-fishing': 10, 'Tech-support': 11, 'Protective-serv': 12, 'Armed-Forces': 13, 'Priv-house-serv': 14 } relationship_map = { 'Not-in-family': 1, 'Unmarried': 2, 'Own-child': 3, 'Other-relative': 4, 'Husband': 5, 'Wife': 6 } income_map = { '<=50K': 0, '>50K': 1 } # Define the input fields for the user to provide data def get_user_input(): age = st.number_input('Age', min_value=0, max_value=120, value=30) workclass = st.selectbox('Workclass', list(workclass_map.keys())) fnlwgt = st.number_input('Fnlwgt', min_value=0, value=100000) education = st.selectbox('Education', list(education_ranks.keys())) education_num = st.number_input('Education Num', min_value=0, max_value=20, value=10) marital_status = st.selectbox('Marital Status', list(marital_map.keys())) occupation = st.selectbox('Occupation', list(occupation_map.keys())) relationship = st.selectbox('Relationship', list(relationship_map.keys())) capital_gain = st.number_input('Capital Gain', min_value=0, value=0) capital_loss = st.number_input('Capital Loss', min_value=0, value=0) hours_per_week = st.number_input('Hours Per Week', min_value=0, max_value=168, value=40) user_data = { 'age': age, 'workclass_rank': workclass_map[workclass], 'fnlwgt': fnlwgt, 'education_rank': education_ranks[education], 'education.num': education_num, 'marital_status_binary': marital_map[marital_status], 'occupation_rank': occupation_map[occupation], 'relationship_rank': relationship_map[relationship], 'capital.gain': capital_gain, 'capital.loss': capital_loss, 'hours.per.week': hours_per_week } features = pd.DataFrame(user_data, index=[0]) return features # Main function to run the app def main(): st.title('Income Prediction App') st.write('This app predicts whether a person makes over $50K a year based on their demographics and work information.') user_input = get_user_input() st.subheader('User Input:') st.write(user_input) prediction = model.predict(user_input) prediction_proba = model.predict_proba(user_input)[:, 1] st.subheader('Prediction:') if prediction[0] == 1: st.write('The model predicts: Income > $50K') else: st.write('The model predicts: Income <= $50K') st.subheader('Prediction Probability:') st.write(f'Probability of making over $50K: {prediction_proba[0]:.2f}') if __name__ == '__main__': main()