rasmodev's picture
Update app.py
4bd969c
raw
history blame
No virus
9.25 kB
import pandas as pd
import streamlit as st
import numpy as np
import pickle
import catboost
from sklearn.impute import SimpleImputer
# Load the saved model and unique values:
with open("model_and_key_components.pkl", "rb") as f:
components = pickle.load(f)
# Extract the individual components
dt_model = components["model"]
unique_values = components["unique_values"]
st.image("https://i.ytimg.com/vi/WULwst0vW8g/maxresdefault.jpg")
st.title("Income Prediction App")
# Sidebar with input field descriptions
st.sidebar.header("Description of the Required Input Fields")
st.sidebar.markdown("**Age**: Enter the age of the individual (e.g., 25, 42, 57).")
st.sidebar.markdown("**Gender**: Select the gender of the individual (e.g., Male, Female).")
st.sidebar.markdown("**Education**: Choose the highest education level of the individual (e.g., Bachelors Degree, High School Graduate, Masters Degree).")
st.sidebar.markdown("**Worker Class**: Select the class of worker for the individual (e.g., Private, Government, Self-employed).")
st.sidebar.markdown("**Marital Status**: Choose the marital status of the individual (e.g., Married, Never married, Divorced).")
st.sidebar.markdown("**Race**: Select the race of the individual (e.g., White, Black, Asian-Pac-Islander).")
st.sidebar.markdown("**Hispanic Origin**: Choose the Hispanic origin of the individual (e.g., Mexican, Puerto Rican, Cuban).")
st.sidebar.markdown("**Full/Part-Time Employment**: Select the employment status as full-time or part-time (e.g., Full-time schedules, Part-time schedules).")
st.sidebar.markdown("**Wage Per Hour**: Enter the wage per hour of the individual (numeric value, e.g., 20.50).")
st.sidebar.markdown("**Weeks Worked Per Year**: Specify the number of weeks the individual worked in a year (numeric value, e.g., 45).")
st.sidebar.markdown("**Industry Code**: Choose the category code of the industry where the individual works (e.g., Category 1, Category 2).")
st.sidebar.markdown("**Major Industry Code**: Select the major industry code of the individual's work (e.g., Industry A, Industry B).")
st.sidebar.markdown("**Occupation Code**: Choose the category code of the occupation of the individual (e.g., Category X, Category Y).")
st.sidebar.markdown("**Major Occupation Code**: Select the major occupation code of the individual (e.g., Occupation 1, Occupation 2).")
st.sidebar.markdown("**Total Employed**: Specify the number of persons worked for the employer (numeric value, e.g., 3, 5).")
st.sidebar.markdown("**Household Stat**: Choose the detailed household and family status of the individual (e.g., Single, Married-civilian spouse present).")
st.sidebar.markdown("**Household Summary**: Select the detailed household summary (e.g., Child under 18 never married, Spouse of householder).")
st.sidebar.markdown("**Veteran Benefits**: Choose whether the individual receives veteran benefits (Yes or No).")
st.sidebar.markdown("**Tax Filer Status**: Select the tax filer status of the individual (e.g., Single, Joint both 65+).")
st.sidebar.markdown("**Gains**: Specify any gains the individual has (numeric value, e.g., 1500.0).")
st.sidebar.markdown("**Losses**: Specify any losses the individual has (numeric value, e.g., 300.0).")
st.sidebar.markdown("**Dividends from Stocks**: Specify any dividends from stocks for the individual (numeric value, e.g., 120.5).")
st.sidebar.markdown("**Citizenship**: Select the citizenship status of the individual (e.g., Native, Foreign Born- Not a citizen of U S).")
st.sidebar.markdown("**Year of Migration**: Enter the year of migration for the individual (numeric value, e.g., 2005).")
st.sidebar.markdown("**Country of Birth**: Choose the individual's birth country (e.g., United-States, Other).")
st.sidebar.markdown("**Importance of Record**: Enter the weight of the instance (numeric value, e.g., 0.9).")
# Create the input fields in the order of your DataFrame
input_data = {
'age': 0, # Default values, you can change these as needed
'gender': unique_values['gender'][0],
'education': unique_values['education'][0],
'worker_class': unique_values['worker_class'][0],
'marital_status': unique_values['marital_status'][0],
'race': unique_values['race'][0],
'is_hispanic': unique_values['is_hispanic'][0],
'employment_commitment': unique_values['employment_commitment'][0],
'employment_stat': unique_values['employment_stat'][0],
'wage_per_hour': 0, # Default value
'working_week_per_year': 0, # Default value
'industry_code': 0, # Default value
'industry_code_main': unique_values['industry_code_main'][0],
'occupation_code': 0, # Default value
'occupation_code_main': unique_values['occupation_code_main'][0],
'total_employed': 0, # Default value
'household_stat': unique_values['household_stat'][0],
'household_summary': unique_values['household_summary'][0],
'vet_benefit': 0, # Default value
'tax_status': unique_values['tax_status'][0],
'gains': 0, # Default value
'losses': 0, # Default value
'stocks_status': 0, # Default value
'citizenship': unique_values['citizenship'][0],
'mig_year': 0,
'country_of_birth_own': 'United-States',
'importance_of_record': 0.0 # Default value
}
# Create the input fields
col1, col2, col3 = st.columns(3)
with col1:
input_data['age'] = st.number_input("Age", min_value=0, key='age')
input_data['gender'] = st.selectbox("Gender", unique_values['gender'], key='gender')
input_data['education'] = st.selectbox("Education", unique_values['education'], key='education')
input_data['worker_class'] = st.selectbox("Class of Worker", unique_values['worker_class'], key='worker_class')
input_data['marital_status'] = st.selectbox("Marital Status", unique_values['marital_status'], key='marital_status')
input_data['race'] = st.selectbox("Race", unique_values['race'], key='race')
input_data['is_hispanic'] = st.selectbox("Hispanic Origin", unique_values['is_hispanic'], key='is_hispanic')
input_data['employment_commitment'] = st.selectbox("Full/Part-Time Employment", unique_values['employment_commitment'], key='employment_commitment')
input_data['employment_stat'] = st.selectbox("Has Own Business Or Is Self Employed", unique_values['employment_stat'], key='employment_stat')
input_data['wage_per_hour'] = st.number_input("Wage Per Hour", min_value=0, key='wage_per_hour')
with col2:
input_data['working_week_per_year'] = st.number_input("Weeks Worked Per Year", min_value=0, key='working_week_per_year')
input_data['industry_code'] = st.selectbox("Category Code of Industry", unique_values['industry_code'], key='industry_code')
input_data['industry_code_main'] = st.selectbox("Major Industry Code", unique_values['industry_code_main'], key='industry_code_main')
input_data['occupation_code'] = st.selectbox("Category Code of Occupation", unique_values['occupation_code'], key='occupation_code')
input_data['occupation_code_main'] = st.selectbox("Major Occupation Code", unique_values['occupation_code_main'], key='occupation_code_main')
input_data['total_employed'] = st.number_input("Number of Persons Worked for Employer", min_value=0, key='total_employed')
input_data['household_stat'] = st.selectbox("Detailed Household and Family Status", unique_values['household_stat'], key='household_stat')
input_data['household_summary'] = st.selectbox("Detailed Household Summary", unique_values['household_summary'], key='household_summary')
input_data['vet_benefit'] = st.selectbox("Veteran Benefits", unique_values['vet_benefit'], key='vet_benefit')
with col3:
input_data['tax_status'] = st.selectbox("Tax Filer Status", unique_values['tax_status'], key='tax_status')
input_data['gains'] = st.number_input("Gains", min_value=0, key='gains')
input_data['losses'] = st.number_input("Losses", min_value=0, key='losses')
input_data['stocks_status'] = st.number_input("Dividends from Stocks", min_value=0, key='stocks_status')
input_data['citizenship'] = st.selectbox("Citizenship", unique_values['citizenship'], key='citizenship')
input_data['mig_year'] = st.selectbox("Migration Year", unique_values['mig_year'], key='migration_year')
input_data['country_of_birth_own'] = st.selectbox("Country of Birth", unique_values['country_of_birth_own'], key='country_of_birth_own')
input_data['importance_of_record'] = st.number_input("Importance of Record", min_value=0, key='importance_of_record')
# Button to make predictions
if st.button("Predict"):
# Transform the input data to a DataFrame for prediction
input_df = pd.DataFrame([input_data])
# Make predictions
prediction = dt_model.predict(input_df)
prediction_proba = dt_model.predict_proba(input_df)
# Display prediction result
st.subheader("Prediction")
if prediction[0] == 1:
st.success("This individual is predicted to have an income of over $50K.")
else:
st.error("This individual is predicted to have an income of under $50K")
# Show prediction probability
st.subheader("Prediction Probability")
st.write(f"The probability of the individual having an income over $50K is: {prediction_proba[0][1]:.2f}")