{ "cells": [ { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connection closed.\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160344\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (3.28s) \n", "Finished: Reading data from Hopsworks, using Hopsworks Feature Query Service (1.04s) \n" ] } ], "source": [ "import streamlit as st\n", "import pandas as pd\n", "import numpy as np\n", "import datetime\n", "import hopsworks\n", "from functions import figure, retrieve\n", "import os\n", "import pickle\n", "import plotly.express as px\n", "import json\n", "from datetime import datetime\n", "import os\n", "\n", "\n", "# Real data\n", "today = datetime.today().strftime('%Y-%m-%d')\n", "df = retrieve.get_merged_dataframe()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Connection closed.\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "\n", "Logged in to project, explore it here https://c.app.hopsworks.ai:443/p/1160344\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "No air_quality_fv feature view found\n", "No air_quality feature group found\n", "No weather feature group found\n", "No aq_predictions feature group found\n", "No air_quality_xgboost_model model found\n", "Connected. Call `.close()` to terminate connection gracefully.\n", "Deleted secret SENSOR_LOCATION_JSON\n" ] } ], "source": [ "import hopsworks\n", "import os\n", "\n", "from functions import util\n", "api_key = os.getenv('HOPSWORKS_API_KEY')\n", "project_name = os.getenv('HOPSWORKS_PROJECT')\n", "project = hopsworks.login(project=project_name, api_key_value=api_key)\n", "util.purge_project(project)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "9" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def backfill_predictions_for_monitoring(weather_fg, air_quality_df, monitor_fg, model):\n", " weather_df = weather_fg.read()\n", " weather_df = weather_df.sort_values(by=['date'], ascending=True)\n", " weather_df['date'] = weather_df['date'].dt.tz_convert(None).astype('datetime64[ns]')\n", " air_quality_df_filter = air_quality_df[['date', 'past_air_quality']]\n", " monitor_fg_filter = monitor_fg.read()[['date','past_air_quality']]\n", " combined_df = pd.concat([air_quality_df_filter, monitor_fg_filter])\n", " combined_df['date'] = pd.to_datetime(combined_df['date'], utc=True)\n", " combined_df['date'] = combined_df['date'].dt.tz_convert(None).astype('datetime64[ns]')\n", " features_df = pd.merge(weather_df, combined_df, on='date', how='left')\n", " \n", " features_df = features_df.tail(10)\n", " features_df['predicted_pm25'] = model.predict(features_df[['past_air_quality','temperature_2m_mean', 'precipitation_sum', 'wind_speed_10m_max', 'wind_direction_10m_dominant']])\n", " air_quality_df['date'] = pd.to_datetime(air_quality_df['date'])\n", " # features_df['date'] = features_df['date'].dt.tz_convert(None).astype('datetime64[ns]')\n", " \n", " df = pd.merge(features_df, air_quality_df[['date','pm25','street','country']], on=\"date\")\n", " df['days_before_forecast_day'] = 1\n", " hindcast_df = df\n", " df = df.drop('pm25', axis=1)\n", " monitor_fg.insert(df, write_options={\"wait_for_job\": True})\n", " return hindcast_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 2 }