| | import streamlit as st |
| | import numpy as np |
| | import pickle |
| | import pandas as pd |
| | import matplotlib.pyplot as plt |
| | import seaborn as sns |
| |
|
| | |
| | model = pickle.load(open("life_expectancy_model.pkl", "rb")) |
| |
|
| | |
| | st.set_page_config( |
| | page_title="Life Expectancy Prediction", |
| | page_icon="π", |
| | layout="centered", |
| | ) |
| |
|
| | |
| | st.markdown(""" |
| | <style> |
| | .stApp { |
| | background-color: #E3F2FD; |
| | } |
| | .title { |
| | text-align: center; |
| | font-size: 28px; |
| | font-weight: bold; |
| | color: #2C3E50; |
| | } |
| | .subtitle { |
| | text-align: center; |
| | font-size: 30px; |
| | font-weight: bold; |
| | color: #003366; |
| | margin-top: 10px; |
| | } |
| | .stButton > button { |
| | width: 100%; |
| | background-color: #1E88E5; |
| | color: white; |
| | font-size: 16px; |
| | font-weight: bold; |
| | border-radius: 6px; |
| | padding: 8px; |
| | transition: 0.3s; |
| | } |
| | .stButton > button:hover { |
| | background-color: #1565C0; |
| | } |
| | .result-box { |
| | text-align: center; |
| | font-size: 22px; |
| | font-weight: bold; |
| | color: white; |
| | padding: 15px; |
| | border-radius: 8px; |
| | margin-top: 20px; |
| | background-color: #388E3C; |
| | } |
| | </style> |
| | """, unsafe_allow_html=True) |
| |
|
| | |
| | if "current_page" not in st.session_state: |
| | st.session_state.current_page = "Model Pipeline" |
| |
|
| | def switch_page(page): |
| | st.session_state.current_page = page |
| |
|
| | |
| | st.sidebar.title("Navigation") |
| | if st.sidebar.button("Model Pipeline"): |
| | switch_page("Model Pipeline") |
| | if st.sidebar.button("Hands-on Model"): |
| | switch_page("Hands-on Model") |
| |
|
| | |
| | data = pd.read_csv("Life Expectancy Data.csv") |
| | data.columns = data.columns.str.strip() |
| |
|
| | |
| | if st.session_state.current_page == "Model Pipeline": |
| | st.markdown("<h1 class='title'>Model Pipeline</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | |
| | |
| | st.image("images/Life_Expectancy.webp", |
| | caption="Life Expectancy Prediction Overview", |
| | use_container_width=True) |
| | |
| |
|
| | if st.button("**Problem Statement**"): |
| | switch_page("Problem Statement") |
| | if st.button("**Data Collection**"): |
| | switch_page("Data Collection") |
| | if st.button("**Simple EDA**"): |
| | switch_page("Simple EDA") |
| | if st.button("**Data Pre-processing**"): |
| | switch_page("Data Pre-processing") |
| | if st.button("**Exploratory Data Analysis**"): |
| | switch_page("EDA") |
| | if st.button("**Model Building**"): |
| | switch_page("Model Building") |
| | if st.button("**Final Model**"): |
| | switch_page("Final Model") |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| |
|
| | st.markdown( |
| | """ |
| | <div style="text-align: center;"> |
| | <a href="https://github.com/Yashvj22/Life_Expectancy_Model" target="_blank" style=" |
| | background-color: #007bff; |
| | color: white; |
| | padding: 12px 25px; |
| | text-decoration: none; |
| | font-size: 16px; |
| | font-weight: bold; |
| | border-radius: 8px; |
| | display: inline-block; |
| | transition: 0.3s;"> |
| | π See Whole Code on GitHub |
| | </a> |
| | </div> |
| | """, |
| | unsafe_allow_html=True |
| | ) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | st.markdown(''' |
| | <h2 style="text-align:center;"> About Author</h2> |
| | <div style="background-color:#f5f5f5; border-radius:10px; padding:20px; margin-top:20px;"> |
| | <p style="font-size:16px; text-align:center; font-family:Georgia; line-height:1.6; color:#000;"> |
| | Hello! Iβm <b>Yash Jadhav</b>, a passionate <span style="color:#FF6347;">Data Scientist</span> |
| | and <span style="color:#4682B4;">Data Analyst</span>. |
| | I specialize in transforming raw data into actionable insights and helping others master the art of Machine Learning. |
| | </p> |
| | <div style="text-align:center; margin-top:20px;"> |
| | <a href="https://www.linkedin.com/in/yash-jadhav-454b0a237/" target="_blank" style=" |
| | background-color:#0073b1; color:white; padding:10px 20px; border-radius:5px; |
| | text-decoration:none; margin-right:10px;">LinkedIn</a> |
| | <a href="https://github.com/Yashvj22" target="_blank" style=" |
| | background-color:black; color:white; padding:10px 20px; border-radius:5px; |
| | text-decoration:none; margin-right:10px;">GitHub</a> |
| | <a href="https://medium.com/@yashvj2222" target="_blank" style=" |
| | background-color:grey; color:white; padding:10px 20px; border-radius:5px; |
| | text-decoration:none;">Medium</a> |
| | </div> |
| | </div> |
| | ''', unsafe_allow_html=True) |
| |
|
| |
|
| | |
| | elif st.session_state.current_page == "Problem Statement": |
| | st.markdown("<h1 class='title'>Problem Statement</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center; margin-top: 20px;"> |
| | The Goal of this project is to build a predictive model that estimates the Life Expectancy of a country |
| | based on multiple influencing factors such as health indicators, economic conditions, and social parameters. |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | st.image("images/problem_statement.png", |
| | caption="Life Expectancy Prediction Overview", |
| | use_container_width=True) |
| |
|
| |
|
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| |
|
| | elif st.session_state.current_page == "Data Collection": |
| | st.markdown("<h1 class='title'>Data Collection</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center; margin-top: 20px;"> |
| | The dataset used in this project is sourced from Kaggle, containing information on life expectancy across |
| | different countries along with various health, economic, and demographic factors. |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center; margin-top: 10px;"> |
| | π <a href="https://www.kaggle.com/datasets/kumarajarshi/life-expectancy-who" target="_blank" style="font-weight: bold; color: #007BFF; text-decoration: none;"> |
| | Click here to access the dataset on Kaggle</a> |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| |
|
| | st.markdown("<h2 class='subtitle' style='text-align: center; margin-top: 20px;'>Dataset Overview</h2>", unsafe_allow_html=True) |
| | |
| | st.markdown(""" |
| | <h5 style="text-align: center; margin-top: 15px; margin-bottom: 20px;"> |
| | The dataset consists of <b>2938 rows</b> and <b>22 columns</b>, capturing crucial indicators such as life expectancy, |
| | mortality rates, GDP, schooling, immunization rates, and more. Below is a summary of the dataset features: |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | data_info = """ |
| | <div style= "font-size: 16px; background-color: #F5F5F5; padding: 15px; border-radius: 10px;"> |
| | β’ <b>Country:</b> Name of the country (Categorical)<br> |
| | β’ <b>Year:</b> Year of observation (Numerical)<br> |
| | β’ <b>Status:</b> Developing or Developed country (Categorical)<br> |
| | β’ <b>Life Expectancy:</b> Average age a person is expected to live (Numerical)<br> |
| | β’ <b>Adult Mortality:</b> Probability of dying between 15-60 years per 1000 population (Numerical)<br> |
| | β’ <b>Infant Deaths:</b> Number of infant deaths per 1000 live births (Numerical)<br> |
| | β’ <b>Alcohol:</b> Alcohol consumption per capita (Numerical)<br> |
| | β’ <b>Percentage Expenditure:</b> Government expenditure on health as a percentage of GDP (Numerical)<br> |
| | β’ <b>Hepatitis B:</b> Immunization coverage for Hepatitis B (Numerical)<br> |
| | β’ <b>Measles:</b> Number of reported measles cases per year (Numerical)<br> |
| | β’ <b>BMI:</b> Average Body Mass Index of the population (Numerical)<br> |
| | β’ <b>Under-five Deaths:</b> Number of deaths under the age of five per 1000 live births (Numerical)<br> |
| | β’ <b>Polio:</b> Immunization coverage for Polio (Numerical)<br> |
| | β’ <b>Total Expenditure:</b> Total health expenditure as a percentage of GDP (Numerical)<br> |
| | β’ <b>Diphtheria:</b> Immunization coverage for Diphtheria (Numerical)<br> |
| | β’ <b>HIV/AIDS:</b> Death rate due to HIV/AIDS per 100,000 people (Numerical)<br> |
| | β’ <b>GDP:</b> Gross Domestic Product per capita (Numerical)<br> |
| | β’ <b>Population:</b> Total population of the country (Numerical)<br> |
| | β’ <b>Thinness 1-19 Years:</b> Percentage of thin individuals aged 1-19 years (Numerical)<br> |
| | β’ <b>Thinness 5-9 Years:</b> Percentage of thin individuals aged 5-9 years (Numerical)<br> |
| | β’ <b>Income Composition:</b> Human development index based on income composition (Numerical)<br> |
| | β’ <b>Schooling:</b> Average number of years of schooling (Numerical)<br> |
| | </div> |
| | """ |
| | |
| | st.markdown(data_info, unsafe_allow_html=True) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| |
|
| | |
| | |
| | elif st.session_state.current_page == "Simple EDA": |
| | st.markdown("<h1 class='title'>Simple Exploratory Data Analysis</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center; margin-top: 20px;"> |
| | Exploratory Data Analysis (EDA) helps in understanding the structure, patterns, and missing values in the dataset. |
| | Below is an initial preview of the data, followed by a missing values summary. |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h3 class='subtitle' style='text-align: center;'>Sample Dataset</h3>", unsafe_allow_html=True) |
| | st.dataframe(data.head()) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h3 class='subtitle' style='text-align: center;'>Missing Values Summary</h3>", unsafe_allow_html=True) |
| | |
| | missing_values = data.isna().sum().reset_index() |
| | missing_values.columns = ["Column Name", "Missing Values"] |
| | |
| | col1, col2, col3 = st.columns([1, 2, 1]) |
| |
|
| | with col2: |
| | st.dataframe(missing_values) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h3 class='subtitle' style='text-align: center;'>Data Description</h3>", unsafe_allow_html=True) |
| | |
| | st.dataframe(data.describe()) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h3 class='subtitle' style='text-align: center;'>Boxplots for Data Distribution</h3>", unsafe_allow_html=True) |
| |
|
| | |
| | columns = ['Life expectancy', 'Adult Mortality', |
| | 'infant deaths', 'Alcohol', 'percentage expenditure', 'Hepatitis B', |
| | 'Measles', 'BMI', 'under-five deaths', 'Polio', 'Total expenditure', |
| | 'Diphtheria', 'HIV/AIDS', 'GDP', 'Population', 'thinness 1-19 years', |
| | 'thinness 5-9 years', 'Income composition of resources', 'Schooling'] |
| |
|
| | |
| | fig, axes = plt.subplots(nrows=10, ncols=2, figsize=(12, 30)) |
| | axes = axes.flatten() |
| |
|
| | for i, col in enumerate(columns): |
| | sns.boxplot(x=data[col], ax=axes[i], color="skyblue") |
| | axes[i].set_title(f'Boxplot of {col}', fontsize=12) |
| | axes[i].set_xlabel("") |
| |
|
| | plt.tight_layout() |
| | st.pyplot(fig) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| | |
| |
|
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| |
|
| |
|
| | elif st.session_state.current_page == "Data Pre-processing": |
| | st.markdown("<h1 class='title'>Data Preprocessing</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<h2 class='subtitle' style='text-align: center;'>Handling Missing Values</h2>", unsafe_allow_html=True) |
| | |
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center;"> |
| | <b>Using "Median" Imputation to Fill Highly Skewed Data</b> |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <div style=" |
| | border: 1px solid #ddd; |
| | border-radius: 8px; |
| | padding: 15px; |
| | background-color: #f9f9f9; |
| | text-align: justify;"> |
| | Median imputation is used for columns where data distribution is highly skewed. |
| | This approach ensures that extreme values do not overly influence the dataset. |
| | Examples include GDP, Population, and Adult Mortality. |
| | </div> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center;"> |
| | <b>Mean Imputation for Columns with Small Missing Values and Normally Distributed Data</b> |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <div style=" |
| | border: 1px solid #ddd; |
| | border-radius: 8px; |
| | padding: 15px; |
| | background-color: #f9f9f9; |
| | text-align: justify;"> |
| | Mean imputation is applied when missing values are small and the data is normally distributed. |
| | This helps maintain the overall dataset structure without being affected by extreme values. |
| | Suitable columns include BMI, Polio, and Schooling. |
| | </div> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style="text-align: center;"> |
| | <b>Applying One-Hot Encoding on "Status" Column</b> |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <div style=" |
| | border: 1px solid #ddd; |
| | border-radius: 8px; |
| | padding: 15px; |
| | background-color: #f9f9f9; |
| | text-align: justify;"> |
| | The "Status" column categorizes countries as either Developed or Developing. |
| | One-Hot Encoding is used to convert this categorical variable into a numerical format |
| | suitable for machine learning models. The "drop='first'" parameter is applied to prevent |
| | multicollinearity. |
| | </div> |
| | """, unsafe_allow_html=True) |
| |
|
| |
|
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| |
|
| | |
| | |
| | elif st.session_state.current_page == "EDA": |
| | st.markdown("<h1 class='title'>Exploratory Data Analysis (EDA)</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h2 class='subtitle' style='text-align: center;'>Target Column Distribution</h2>", unsafe_allow_html=True) |
| | st.image("images/target_column_distribution.png", caption="Life Expectancy Distribution", use_container_width=True) |
| | st.markdown(""" |
| | <h5 style="text-align: center;"> |
| | Insight: Mostly Life Expectancy is in <b>range of 50-80</b>. |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h2 class='subtitle' style='text-align: center;'>Correlation Heatmap</h2>", unsafe_allow_html=True) |
| | st.image("images/Correlation_Heatmap.png", caption="Correlation Heatmap", use_container_width=True) |
| | st.markdown(""" |
| | <h5 style="text-align: center;"> |
| | Insight: Our target column <b>Life Expectancy</b> is mostly linearly dependent on |
| | <b>Schooling, Income Composition of Resources, GDP, Diphtheria, Polio, BMI, and Percentage Expenditure</b>. |
| | </h5> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown("<h2 class='subtitle' style='text-align: center;'>How Specific Columns Affect Life Expectancy</h2>", unsafe_allow_html=True) |
| |
|
| | |
| | st.image("images/specific_col_affecting_target.png", caption="Features vs. Life Expectancy", use_container_width=True) |
| | st.markdown(""" |
| | <h5> |
| | Insights: |
| | |
| | 1οΈβ£ **GDP vs. Life Expectancy** |
| | - Positive correlation: As GDP increases, Life Expectancy also increases. |
| | - Some countries with low GDP still have high Life Expectancy due to good healthcare policies. |
| | |
| | 2οΈβ£ **Schooling vs. Life Expectancy** |
| | - Strong positive correlation: More years of schooling β longer life. |
| | - Educated populations follow better hygiene, diet, and medical care, increasing Life Expectancy. |
| | |
| | 3οΈβ£ **Income Composition vs. Life Expectancy** |
| | - Higher economic stability leads to better healthcare systems and lifestyles, improving Life Expectancy. |
| | |
| | 4οΈβ£ **Diphtheria & Polio vs. Life Expectancy** |
| | - Higher vaccination rates (80%-100%) correspond to Life Expectancy above 70 years. |
| | - Lower vaccination rates (<40%) lead to lower Life Expectancy (~40-60 years), indicating weak healthcare infrastructure. |
| | |
| | 5οΈβ£ **BMI vs. Life Expectancy** |
| | - No clear linear trend due to high variance in data points. |
| | - BMI < 18 (malnutrition) and BMI > 30 (obesity) reduce Life Expectancy. |
| | - Advanced healthcare and better nutrition in some countries help maintain high Life Expectancy despite malnutrition/obesity. |
| | </h5> |
| | """, unsafe_allow_html=True) |
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | |
| | st.markdown("<h2 class='subtitle' style='text-align: center;'>Life Expectancy vs Developed / Undeveloped Countries</h2>", unsafe_allow_html=True) |
| | st.image("images/target_col vs countries.png", caption="Life Expectancy vs Developed / Undeveloped Countries", use_container_width=True) |
| | st.markdown(""" |
| | <h5 style="text-align: center;"> |
| | Insight: Life Expectancy is <b>higher in Developed Countries</b> due to Advanced Healthcare, Better Nutrition, Medical Interventions. |
| | </h5>""", unsafe_allow_html=True) |
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| | |
| |
|
| | |
| | elif st.session_state.current_page == "Model Building": |
| | |
| | st.markdown(""" |
| | <h2 style='text-align: center; color: #333;'>Model Building</h2> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h2>Introduction</h2> |
| | <p>In this section, we explore different <b>Ensemble Learning</b> techniques to improve model performance.</p> |
| | <p>We implemented three ensemble models: |
| | <span style='font-size:16px;'>π₯ <b>Voting Regressor</b> - π― <b>Bagging Regressor</b> - π² <b>Random Forest Regressor</b></span></p> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | st.markdown(""" |
| | <h5 style='color: #1363DF;'>1οΈβ£ Voting Regressor</h5> |
| | <ul> |
| | <li><b>Concept:</b> Combines multiple models (<b>KNN & Decision Tree</b>) and takes the <b>average prediction</b>.</li> |
| | <li><b>Why Voting Regressor?</b> β
Works well when models have different strengths. β
Reduces variance while maintaining interpretability.</li> |
| | </ul> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown("<hr style='border:1px dashed #bbb;'>", unsafe_allow_html=True) |
| | |
| | st.markdown(""" |
| | <h5 style='color: #FF6D28;'>2οΈβ£ Bagging Regressor</h5> |
| | <ul> |
| | <li><b>Concept:</b> Uses <b>bootstrap sampling</b> to train multiple models on different subsets of data.</li> |
| | <li><b>Why Bagging Regressor?</b> β
Reduces overfitting by averaging multiple models. β
Works best with <b>high-variance models</b> like Decision Tree.</li> |
| | </ul> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown("<hr style='border:1px dashed #bbb;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <h5 style='color: #2EB086;'>3οΈβ£ Random Forest Regressor</h5> |
| | <ul> |
| | <li><b>Concept:</b> Uses <b>multiple Decision Trees</b>, trained on different feature subsets.</li> |
| | <li><b>Why Random Forest?</b> β
Handles <b>non-linearity</b> well. β
Less prone to overfitting compared to a single Decision Tree.</li> |
| | </ul> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | st.markdown(""" |
| | <h3>Combining High & Low Variance Models</h3> |
| | <p>A crucial step to improve ensemble performance is <b>choosing models with different variance levels</b>:</p> |
| | <ul> |
| | <li><b>Voting Regressor:</b> Uses a combination of <b>high-variance</b> (Decision Tree, KNN with small K) and <b>low-variance</b> (KNN with large K, Decision Tree with depth constraint) models.</li> |
| | <li><b>Bagging & Random Forest:</b> Use <b>only high-variance models</b> (Decision Trees with deep splits) to maximize variance reduction.</li> |
| | </ul> |
| | <p><b>This technique helps create a <span style='color: green;'>balanced ensemble</span>, preventing excessive overfitting or underfitting! β
</b></p> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | |
| | st.markdown(""" |
| | <h3>Hyperparameter Tuning using Optuna β‘</h3> |
| | <p>We optimized hyperparameters for <b>KNN, Decision Tree, Bagging Regressor, and Random Forest</b> using <b>Optuna</b>.</p> |
| | <p>Below are the <b>optimized parameters</b> for each model:</p> |
| | |
| | <h5>πΉ K-Nearest Neighbors (KNN)</h5> |
| | <ul> |
| | <li><code>n_neighbors</code></li> |
| | <li><code>p</code></li> |
| | <li><code>weights</code></li> |
| | <li><code>algorithm</code></li> |
| | </ul> |
| | |
| | <h5>πΉ Decision Tree</h5> |
| | <ul> |
| | <li><code>max_depth</code></li> |
| | <li><code>min_samples_split</code></li> |
| | <li><code>min_samples_leaf</code></li> |
| | <li><code>max_features</code></li> |
| | <li><code>min_impurity_decrease</code></li> |
| | </ul> |
| | |
| | <h5>πΉ Bagging Regressor</h5> |
| | <ul> |
| | <li><code>n_estimators</code></li> |
| | <li><code>max_samples</code></li> |
| | </ul> |
| | |
| | <h5>πΉ Random Forest</h5> |
| | <ul> |
| | <li><code>n_estimators</code></li> |
| | <li><code>max_samples</code></li> |
| | </ul> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | |
| | st.markdown(""" |
| | <h3>Model Performance Insights π</h3> |
| | <p>Hereβs how our ensemble models performed on training and test datasets:</p> |
| | """, unsafe_allow_html=True) |
| | |
| | st.markdown(""" |
| | <style> |
| | table { |
| | width: 100%; |
| | border-collapse: collapse; |
| | text-align: center; |
| | font-size: 16px; |
| | } |
| | th, td { |
| | padding: 10px; |
| | border-bottom: 1px solid #ddd; |
| | } |
| | th { |
| | background-color: #F3F4F6; |
| | } |
| | </style> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown(""" |
| | <table> |
| | <tr> |
| | <th>Ensemble</th> |
| | <th>Training Score</th> |
| | <th>Test Score</th> |
| | <th>Generalized Score</th> |
| | </tr> |
| | <tr> |
| | <td>Voting Ensemble</td> |
| | <td>95.80%</td> |
| | <td>92.13%</td> |
| | <td>92.89%</td> |
| | </tr> |
| | <tr> |
| | <td>Bagging Ensemble</td> |
| | <td>98.68%</td> |
| | <td>95.04%</td> |
| | <td><b>95.45%</b></td> |
| | </tr> |
| | <tr> |
| | <td>Random Forest</td> |
| | <td>97.92%</td> |
| | <td>94.71%</td> |
| | <td>94.71%</td> |
| | </tr> |
| | </table> |
| | """, unsafe_allow_html=True) |
| |
|
| | st.markdown("<br>", unsafe_allow_html=True) |
| |
|
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| |
|
| | |
| | elif st.session_state.current_page == "Final Model": |
| |
|
| | st.markdown( |
| | """ |
| | <style> |
| | .title { |
| | text-align: center; |
| | font-size: 36px; |
| | font-weight: bold; |
| | color: #1E3A8A; |
| | } |
| | .subtitle { |
| | text-align: center; |
| | font-size: 20px; |
| | color: #475569; |
| | margin-bottom: 20px; |
| | } |
| | .image-container { |
| | display: flex; |
| | justify-content: center; |
| | } |
| | .caption { |
| | text-align: center; |
| | font-size: 16px; |
| | font-style: italic; |
| | color: #6B7280; |
| | } |
| | .box { |
| | background-color: #F8FAFC; |
| | padding: 15px; |
| | border-radius: 10px; |
| | box-shadow: 2px 2px 10px rgba(0, 0, 0, 0.1); |
| | margin-bottom: 20px; |
| | } |
| | </style> |
| | """, |
| | unsafe_allow_html=True, |
| | ) |
| |
|
| | |
| | st.markdown("<h1 class='title'>Final Model</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown( |
| | "<div class='box'>" |
| | "<p><strong>After experimenting with multiple trials using Optuna, we selected the best-fit model " |
| | "by analyzing the training and test scores of different trials. " |
| | "The following scatter plots provide insights into this selection process.</strong></p>" |
| | "</div>", |
| | unsafe_allow_html=True, |
| | ) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<h3 style='text-align: center;'>Training vs Test Score (All Trials)</h3>", unsafe_allow_html=True) |
| | st.markdown( |
| | "<p class='subtitle'>This scatter plot visualizes the training and test scores of all trials. " |
| | "The goal was to identify a model where both scores are closely aligned, ensuring minimal overfitting or underfitting.</p>", |
| | unsafe_allow_html=True, |
| | ) |
| | |
| | st.image("images/bagging_trails.png", |
| | caption="All Trails", |
| | use_container_width=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | st.markdown("<h3 style='text-align: center;'>Training vs Test Score (First 50 Trials)</h3>", unsafe_allow_html=True) |
| | st.markdown( |
| | "<p class='subtitle'>By filtering the first 50 trials, we focused on models that demonstrated balanced performance. " |
| | "The best-fit model was selected by ensuring that the training and test scores are close to each other.</p>", |
| | unsafe_allow_html=True, |
| | ) |
| |
|
| | st.image("images/bagging_50trails.png", |
| | caption="50 Trails", |
| | use_container_width=True) |
| |
|
| | st.markdown( |
| | "<p style='text-align: center; font-weight: bold; font-size: 16px;'>" |
| | "From the above trials, we selected the <b>9th trial</b> as its train score and test score have minimal difference." |
| | "</p>", |
| | unsafe_allow_html=True |
| | ) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| | |
| | st.markdown("<h3 style='text-align: center;'>Selected Best-Fit Model</h3>", unsafe_allow_html=True) |
| |
|
| | st.markdown( |
| | "<div class='box'>" |
| | "<ul>" |
| | "<li><b>Base Model:</b> DecisionTreeRegressor</li>" |
| | "<li><b>Hyperparameters:</b>" |
| | "<ul>" |
| | "<li>min_samples_leaf = 2</li>" |
| | "<li>min_samples_split = 3</li>" |
| | "</ul></li>" |
| | "<li><b>Ensemble Method:</b> BaggingRegressor</li>" |
| | "<li><b>Bagging Hyperparameters:</b>" |
| | "<ul>" |
| | "<li>n_estimators = 40</li>" |
| | "<li>max_samples = 0.838404</li>" |
| | "</ul></li>" |
| | "</ul>" |
| | "<p>This model was selected as it demonstrated a balance between generalization and performance.</p>" |
| | "</div>", |
| | unsafe_allow_html=True, |
| | ) |
| |
|
| | |
| | if st.button("π Go Back to Model Pipeline"): |
| | switch_page("Model Pipeline") |
| | |
| |
|
| | |
| | elif st.session_state.current_page == "Hands-on Model": |
| | st.markdown("<h1 class='title'>Hands-on Model</h1>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<hr style='border:1px solid #ddd;'>", unsafe_allow_html=True) |
| |
|
| | st.markdown("<h4 class='subtitle' style='text-align: center;'>Provide inputs to predict Life Expectancy</h4>", unsafe_allow_html=True) |
| | |
| |
|
| | col1, col2 = st.columns(2) |
| | with col1: |
| | year = st.slider("Year", 2000, 2015, 2008) |
| | status = st.radio("Status", ["Developing", "Developed"], horizontal=True) |
| | status = 1 if status == "Developed" else 0 |
| | adult_mortality = st.slider("Adult Mortality Rate", 1, 723, 144) |
| | infant_deaths = st.slider("Infant Deaths", 0, 1800, 3) |
| | alcohol = st.slider("Alcohol Consumption", 0.01, 17.87, 4.55) |
| | percentage_expenditure = st.slider("Percentage Expenditure", 0.0, 19479.91, 738.25) |
| | hepatitis_b = st.slider("Hepatitis B Immunization (%)", 1, 99, 83) |
| | measles = st.slider("Measles Cases", 0, 212183, 2419) |
| | bmi = st.slider("BMI", 1.0, 87.3, 38.3) |
| | polio = st.slider("Polio Immunization (%)", 3, 99, 82) |
| |
|
| | with col2: |
| | under_five_deaths = st.slider("Under-Five Deaths", 0, 2500, 4) |
| | total_expenditure = st.slider("Total Healthcare Expenditure (%)", 0.37, 17.6, 5.92) |
| | diphtheria = st.slider("Diphtheria Immunization (%)", 2, 99, 82) |
| | hiv_aids = st.slider("HIV/AIDS Prevalence Rate", 0.1, 50.6, 1.74) |
| | gdp = st.slider("GDP per Capita", 1.68, 119172.7, 6611.52) |
| | population = st.slider("Population", 34, 1293859000, 10230850) |
| | thinness_1_19 = st.slider("Thinness 1-19 years (%)", 0.1, 27.7, 4.83) |
| | thinness_5_9 = st.slider("Thinness 5-9 years (%)", 0.1, 28.6, 4.86) |
| | income_composition = st.slider("Income Composition of Resources", 0.0, 0.948, 0.63) |
| | schooling = st.slider("Schooling (Years)", 0.0, 20.7, 11.99) |
| |
|
| | if st.button("Predict Life Expectancy"): |
| | features = np.array([[year, status, adult_mortality, infant_deaths, alcohol, percentage_expenditure, |
| | hepatitis_b, measles, bmi, under_five_deaths, polio, total_expenditure, |
| | diphtheria, hiv_aids, gdp, population, thinness_1_19, thinness_5_9, |
| | income_composition, schooling]]) |
| | |
| | prediction = model.predict(features)[0] |
| |
|
| | st.markdown( |
| | f""" |
| | <div class="result-box"> |
| | Predicted Life Expectancy: <b>{prediction:.2f} years</b> |
| | </div> |
| | """, |
| | unsafe_allow_html=True, |
| | ) |
| |
|