alperugurcan's picture
Update app.py
e55f408 verified
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
# Load and preprocess the dataset
@st.cache_data
def load_data():
# Load the dataset
df = pd.read_csv('Human Development Index - Full.csv')
# Select relevant columns
base_columns = ['ISO3', 'Country', 'Human Development Groups', 'HDI Rank (2021)']
hdi_columns = [col for col in df.columns if col.startswith('Human Development Index')]
columns_to_use = base_columns + hdi_columns
df_hdi = df[columns_to_use]
# Remove columns before 2010
columns_to_keep = ['ISO3', 'Country', 'Human Development Groups', 'HDI Rank (2021)'] + \
[col for col in hdi_columns if int(col[-5:-1]) >= 2010]
df_hdi = df_hdi[columns_to_keep]
# Drop rows with missing values and unnecessary columns
df_hdi.dropna(inplace=True)
df_hdi = df_hdi.drop(columns=['ISO3'])
# Sort and create HDI Rank column
df_hdi_sorted = df_hdi.sort_values('Human Development Index (2021)', ascending=False)
df_hdi_sorted['HDI Rank (2021)'] = range(1, len(df_hdi_sorted) + 1)
# Create additional dataframes
df_years = df_hdi_sorted.drop(columns=['HDI Rank (2021)', 'Human Development Groups'])
df_rank = df_hdi_sorted[['Country', 'HDI Rank (2021)']]
df_hdi_groups = df_hdi_sorted[['Country', 'Human Development Groups']]
return df_hdi_sorted, df_years, df_rank, df_hdi_groups
# Load the data
df_hdi_sorted, df_years, df_rank, df_hdi_groups = load_data()
# Streamlit app
st.title('Human Development Index Analysis')
# Sidebar
st.sidebar.header('Visualization Options')
chart_type = st.sidebar.selectbox('Select Chart Type',
['Top 10 Countries', 'HDI Groups Distribution', 'HDI Trends', 'Bottom 10 Countries',
'HDI Improvement', 'HDI Distribution', 'World Map', 'HDI Comparison',
'HDI by Development Groups', 'HDI Sunburst'])
# Main content
if chart_type == 'Top 10 Countries':
st.subheader('Top 10 Countries by HDI (2021)')
fig = px.bar(df_years.head(10), x='Country', y='Human Development Index (2021)',
title='Top 10 Countries by HDI (2021)',
color='Human Development Index (2021)', color_continuous_scale='Viridis')
st.plotly_chart(fig)
elif chart_type == 'HDI Groups Distribution':
st.subheader('Distribution of Countries by HDI Groups')
fig = px.pie(df_hdi_groups, names='Human Development Groups',
title='Distribution of Countries by HDI Groups')
st.plotly_chart(fig)
elif chart_type == 'HDI Trends':
st.subheader('HDI Trends for Top 5 Countries')
top_5 = df_rank.head()['Country'].tolist()
fig = px.line(df_years[df_years['Country'].isin(top_5)], x=df_years.columns[1:], y='Country',
title='HDI Trends for Top 5 Countries')
st.plotly_chart(fig)
elif chart_type == 'Bottom 10 Countries':
st.subheader('Bottom 10 Countries by HDI (2021)')
fig = px.bar(df_years.tail(10), x='Country', y='Human Development Index (2021)',
title='Bottom 10 Countries by HDI (2021)')
st.plotly_chart(fig)
elif chart_type == 'HDI Improvement':
st.subheader('Top 10 Countries with Highest HDI Improvement (2010-2021)')
df_years['HDI_change'] = df_years['Human Development Index (2021)'] - df_years['Human Development Index (2010)']
fig = px.bar(df_years.nlargest(10, 'HDI_change'), x='Country', y='HDI_change',
title='Top 10 Countries with Highest HDI Improvement (2010-2021)')
st.plotly_chart(fig)
elif chart_type == 'HDI Distribution':
st.subheader('Distribution of HDI Values (2021)')
fig = px.box(df_years, y='Human Development Index (2021)',
title='Distribution of HDI Values (2021)')
st.plotly_chart(fig)
elif chart_type == 'World Map':
st.subheader('World Map of Human Development Index (2021)')
fig = px.choropleth(df_years, locations='Country', locationmode='country names',
color='Human Development Index (2021)',
title='World Map of Human Development Index (2021)',
color_continuous_scale='Viridis')
st.plotly_chart(fig)
elif chart_type == 'HDI Comparison':
st.subheader('HDI Comparison: 2010 vs 2021')
fig = px.scatter(df_years, x='Human Development Index (2010)', y='Human Development Index (2021)',
hover_name='Country', title='HDI Comparison: 2010 vs 2021')
fig.add_trace(go.Scatter(x=[0, 1], y=[0, 1], mode='lines', name='No Change Line'))
st.plotly_chart(fig)
elif chart_type == 'HDI by Development Groups':
st.subheader('HDI Distribution by Development Groups (2021)')
fig = px.box(df_hdi_sorted, x='Human Development Groups', y='Human Development Index (2021)',
title='HDI Distribution by Development Groups (2021)')
st.plotly_chart(fig)
elif chart_type == 'HDI Sunburst':
st.subheader('HDI Distribution by Groups and Top Countries (2021)')
df_sunburst = df_hdi_sorted.copy()
df_sunburst['HDI_2021'] = pd.cut(df_sunburst['Human Development Index (2021)'],
bins=[0, 0.55, 0.7, 0.8, 1],
labels=['Low', 'Medium', 'High', 'Very High'])
fig = px.sunburst(df_sunburst,
path=['HDI_2021', 'Human Development Groups', 'Country'],
values='Human Development Index (2021)',
color='HDI_2021',
color_discrete_map={'Low': 'red', 'Medium': 'orange',
'High': 'lightgreen', 'Very High': 'darkgreen'},
title='HDI Distribution by Groups and Top Countries (2021)')
fig.update_traces(textinfo="label+percent entry")
st.plotly_chart(fig)
# Add more information or text as needed
st.markdown("""
This app provides various visualizations of the Human Development Index (HDI) data.
Use the sidebar to select different chart types and explore the data.
Data source: [Human Development Index Dataset](https://www.kaggle.com/datasets/iamsouravbanerjee/human-development-index-dataset)
""")