Temesvári Csanád
revert back
174811d
import streamlit as st
import hopsworks
import joblib
import pandas as pd
import datetime
from functions import get_weather_data_weekly, data_encoder, get_aplevel, get_color
from PIL import Image
def fancy_header(text, font_size=24):
res = f'<p style="color:#ff5f27; font-size: {font_size}px;text-align:center">{text}</p>'
st.markdown(res, unsafe_allow_html=True)
st.set_page_config(layout="wide")
st.title('Air Quality Prediction Project for Vienna! 🌩')
vienna_image = Image.open('vienna.jpg')
st.image(vienna_image, use_column_width='auto')
st.write(36 * "-")
st.markdown("# This is a final project in the course ID2223 Scalable Machine Learning and Deep Learning :computer:")
st.markdown("My task was to predict the Air Quality Index (AQI) for one city (I choose Vienna) based on different weather data (pressure, snow-and cloud-coverage, temperature, etc.).")
st.markdown("For the full list of weather data, please click [here](https://visualcrossing.com/resources/documentation/weather-api/timeline-weather-api)")
fancy_header('\n Connecting to Hopsworks Feature Store...')
project = hopsworks.login()
st.write("Successfully connected!✔️")
st.write(36 * "-")
fancy_header('\n Collecting the weather data from Vienna...')
today = datetime.date.today()
city = "vienna"
weekly_data = get_weather_data_weekly(city, today)
st.write("Successfully collected!✔️")
st.write(36 * "-")
fancy_header("Loading the fitted XGBoost model...")
mr = project.get_model_registry()
model = mr.get_best_model("aqi_model", "rmse", "min")
model_dir = model.download()
model = joblib.load(model_dir + "/aqi_model.pkl")
st.write("Succesfully loaded!✔️")
st.sidebar.write("-" * 36)
fancy_header("Making AQI predictions for the next 7 days")
preds = model.predict(data_encoder(weekly_data)).astype(int)
air_pollution_level = ['Good', 'Moderate', 'Unhealthy for sensitive Groups','Unhealthy' ,'Very Unhealthy', 'Hazardous']
poll_level = get_aplevel(preds.T.reshape(-1, 1), air_pollution_level)
next_week_datetime = [today + datetime.timedelta(days=d) for d in range(7)]
next_week_str = [f"{days.strftime('%A')}, {days.strftime('%Y-%m-%d')}" for days in next_week_datetime]
df = pd.DataFrame(data=[preds, poll_level], index=["AQI", "Air pollution level"], columns=next_week_str)
st.write("Here they are!")
st.dataframe(df.style.apply(get_color, subset=(["Air pollution level"], slice(None))))
st.button("Re-run")