csol208-demo / app /data.py
cboettig's picture
Initial commit: Streamlit CO2 explorer
3aa858e
"""Data loading and transformation helpers for the Streamlit CO₂ explorer."""
from __future__ import annotations
from functools import lru_cache
from pathlib import Path
from typing import Iterable
import pandas as pd
DATA_PATH = Path(__file__).resolve().parents[1] / "data" / "co2_emissions_by_sector.csv"
@lru_cache(maxsize=1)
def load_emissions_data() -> pd.DataFrame:
"""Read the tidy emissions dataset.
Returns
-------
pandas.DataFrame
Columns: iso_code, country, year, population, gdp, co2, co2_mt,
sector, share_of_total.
"""
df = pd.read_csv(DATA_PATH)
df["year"] = df["year"].astype(int)
return df
def available_countries() -> list[str]:
data = load_emissions_data()
return sorted(data["country"].unique())
def available_sectors() -> list[str]:
data = load_emissions_data()
return sorted(data["sector"].unique())
def filter_data(
countries: Iterable[str] | None = None,
sectors: Iterable[str] | None = None,
year_range: tuple[int, int] | None = None,
) -> pd.DataFrame:
data = load_emissions_data().copy()
if countries:
data = data[data["country"].isin(countries)]
if sectors:
data = data[data["sector"].isin(sectors)]
if year_range:
start, end = year_range
data = data[(data["year"] >= start) & (data["year"] <= end)]
return data