import streamlit as st import pandas as pd import plotly.express as px import plotly.graph_objects as go ####### # Data loading ####### df = pd.read_csv("gapminder.csv") year_values = (int(df["year"].min()), int(df["year"].max())) metrics = ["lifeExp", "pop", "gdpPercap"] dimension = ["country", "continent", "year"] ####### # Helper functions ####### def get_filtered_data( continents="All", countries="All", min_year=year_values[0], max_year=year_values[1], ): if isinstance(continents, str) and continents != "All": mask_continent = df["continent"] == continents else: mask_continent = df["continent"].isin(continents) if isinstance(countries, str) and countries != "All": mask_country = df["country"] == countries else: mask_country = df["country"].isin(countries) mask_year = ((df["year"] >= min_year) & (df["year"] <= max_year)) return df[mask_continent & mask_country & mask_year] def box_plot(df, x, y): fig = px.box( df, x=x, y=y, hover_data=df[dimension + [x]], points="all", color=x) return fig def scatter_plot(df, x, y, hue): fig = px.scatter( df, x=x, y=y, color=hue, symbol=hue) return fig def line_plot(df, y_axis, label, highlighted): fig = go.Figure() if label=="continent": df = df.groupby(["continent", "year"]).agg({ "lifeExp": "mean", "pop": "sum", "gdpPercap": "mean", }).reset_index() for i in df[label].unique(): if i == highlighted: continue data = df[df[label]==i] x = data["year"] y = data[y_axis] fig.add_trace(go.Scatter(x=x, y=y, hovertext=[ f"{label}: {i}
year: {year}
{y_axis}: {value}" for year, value in zip(x,y) ], hoverinfo="text", mode='lines', line = dict(color='gray', width=1), # name=i )) data = df[df[label]==highlighted] x = data["year"] y = data[y_axis] fig.add_trace(go.Scatter(x=x, y=y, hovertext=[ f"{label}: {highlighted}
year: {year}
{y_axis}: {value}" for year, value in zip(x,y) ], hoverinfo="text", mode='lines', line = dict(color='orange', width=10), # name=highlighted )) fig.update_layout(showlegend=False) return fig ####### # Streamlit app code ####### st.title('[Gapminder] Exploratory Data Analysis') st.markdown("## Gapminder Table") selected_continents = st.multiselect("Select Continents:", df["continent"].unique(), key="table_continent") selected_countries = st.multiselect("Select Countries:", df.loc[df["continent"].isin(selected_continents), "country"].unique(), key="table_country") min_year, max_year = st.slider("Select Year:", year_values[0], year_values[1], year_values, key="table_year") st.dataframe(get_filtered_data(selected_continents, selected_countries, min_year, max_year)) st.markdown("## Gapminder Boxplot") col1, col2 = st.columns(2) with col1: x = st.selectbox("Select x Axis", dimension, 1, key="boxplot_x") with col2: y = st.selectbox("Select y Axis", metrics, key="boxplot_y") st.plotly_chart(box_plot(df, x, y)) st.markdown('## Gapminder Lineplot') col1, col2, col3 = st.columns(3) with col3: label = st.radio("Select label", ["country", "continent"], key="lineplot_label") with col1: highlighted = st.selectbox("Select value to hightlight", df[label].unique(), key="lineplot_highlighting") with col2: y = st.selectbox("Select hue", metrics, key="lineplot_y") st.plotly_chart(line_plot(df, y, label, highlighted)) st.markdown('## Gapminder Scatterplot') col1, col2, col3 = st.columns(3) with col1: x = st.selectbox("Select x Axis", metrics, key="scatterplot_x") with col2: y = st.selectbox("Select y Axis", metrics, key="scatterplot_y") with col3: hue = st.radio("Select hue", ["country", "continent"], key="scatterplot_hue") st.plotly_chart(scatter_plot(df, x, y, hue))