import streamlit as st import hopsworks import joblib import pandas as pd import datetime from functions import get_weather_data_weekly, data_encoder, get_aplevel, get_color from PIL import Image def fancy_header(text, font_size=24): res = f'

{text}

' st.markdown(res, unsafe_allow_html=True) st.set_page_config(layout="wide") st.title('Air Quality Prediction Project for Vienna! 🌩') vienna_image = Image.open('vienna.jpg') st.image(vienna_image, use_column_width='auto') st.write(36 * "-") st.markdown("# This is a final project in the course ID2223 Scalable Machine Learning and Deep Learning :computer:") st.markdown("My task was to predict the Air Quality Index (AQI) for one city (I choose Vienna) based on different weather data (pressure, snow-and cloud-coverage, temperature, etc.).") st.markdown("For the full list of weather data, please click [here](https://visualcrossing.com/resources/documentation/weather-api/timeline-weather-api)") fancy_header('\n Connecting to Hopsworks Feature Store...') project = hopsworks.login() st.write("Successfully connected!✔️") st.write(36 * "-") fancy_header('\n Collecting the weather data from Vienna...') today = datetime.date.today() city = "vienna" weekly_data = get_weather_data_weekly(city, today) st.write("Successfully collected!✔️") st.write(36 * "-") fancy_header("Loading the fitted XGBoost model...") mr = project.get_model_registry() model = mr.get_best_model("aqi_model", "rmse", "min") model_dir = model.download() model = joblib.load(model_dir + "/aqi_model.pkl") st.write("Succesfully loaded!✔️") st.sidebar.write("-" * 36) fancy_header("Making AQI predictions for the next 7 days") preds = model.predict(data_encoder(weekly_data)).astype(int) air_pollution_level = ['Good', 'Moderate', 'Unhealthy for sensitive Groups','Unhealthy' ,'Very Unhealthy', 'Hazardous'] poll_level = get_aplevel(preds.T.reshape(-1, 1), air_pollution_level) next_week_datetime = [today + datetime.timedelta(days=d) for d in range(7)] next_week_str = [f"{days.strftime('%A')}, {days.strftime('%Y-%m-%d')}" for days in next_week_datetime] df = pd.DataFrame(data=[preds, poll_level], index=["AQI", "Air pollution level"], columns=next_week_str) st.write("Here they are!") st.dataframe(df.style.apply(get_color, subset=(["Air pollution level"], slice(None)))) st.button("Re-run")