Spaces:

Mihkelmj
/

utrecht-pollution-prediction

Sleeping

App Files Files Community

Mihkelmj commited on Oct 23

Commit

647d992

•

1 Parent(s): 5064f83

connected real data to everything displayed; modified the layout a bit; added better graphs and expanders

Browse files

Files changed (10) hide show

__pycache__/data_api_calls.cpython-312.pyc +0 -0
app.py +76 -57
daily_api__pollution.py +0 -0
data_api_calls.py +9 -12
dataset.csv +8 -8
linear_regression_model.pkl +0 -3
src/daily_api__pollution.py +0 -161
src/helper_functions.py +16 -14
test.ipynb +0 -158
test.py +0 -7

__pycache__/data_api_calls.cpython-312.pyc CHANGED Viewed

Binary files a/__pycache__/data_api_calls.cpython-312.pyc and b/__pycache__/data_api_calls.cpython-312.pyc differ

app.py CHANGED Viewed

@@ -9,7 +9,7 @@ from src.models_loading import run_model
 st.set_page_config(
     page_title="Utrecht Pollution Dashboard",
-    page_icon="🏂��🌱",
     layout="wide",
     initial_sidebar_state="expanded",
 )
@@ -18,46 +18,13 @@ alt.themes.enable("dark")
 get_data()
 dataset = pd.read_csv("dataset.csv")
 prediction = run_model("O3", data=dataset)
 pred1 = prediction[0][0]
 pred2 = prediction[0][1]
 pred3 = prediction[0][2]
-# App Title
-st.title("Utrecht Pollution Dashboard🌱")
-col1, col2 = st.columns((1, 1))
-# Create a 3-column layout
-with col1:
-    st.subheader("Current Weather")
-    col1, col2, col3 = st.columns(3)
-    # First column
-    with col1:
-        custom_metric_box(label="Temperature", value="2 °C", delta="-3 °C")
-        custom_metric_box(label="Humidity", value="60 %", delta="-1 %")
-    # Second column
-    with col2:
-        custom_metric_box(label="Pressure", value="1010 hPa", delta="+2 hPa")
-        custom_metric_box(label="Precipitation", value="5 mm", delta="-1 mm")
-    # Third column
-    with col3:
-        custom_metric_box(label="Solar Radiation", value="200 W/m²", delta="-20 W/m²")
-        custom_metric_box(label="Wind Speed", value="15 km/h", delta="-2 km/h")
-    st.subheader("Current Pollution Levels")
-    col1, col2 = st.columns((1, 1))
-    # Display the prediction
-    # st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
-    with col1:
-        pollution_box(label="O<sub>3</sub>", value="37 µg/m³", delta="+2 µg/m³")
-    with col2:
-        pollution_box(label="NO<sub>2</sub>", value="28 µg/m³", delta="+3 µg/m³")
-# Sample data (replace with your actual data)
-# Sample data (replace with your actual data)
 dates_past = pd.date_range(end=pd.Timestamp.today(), periods=8).to_list()
 dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
@@ -79,11 +46,48 @@ dates = dates_past + dates_future
 # Create a DataFrame
 df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
-st.subheader("O3 and NO2 Prediction")
-# Create two columns for two separate graphs
-subcol1, subcol2 = st.columns(2)
-# Plot O3 in the first subcolumn
-with subcol1:
     fig_o3 = go.Figure()
     fig_o3.add_trace(
         go.Scatter(
@@ -92,9 +96,9 @@ with subcol1:
             mode="lines+markers",
             name="O3",
             line=dict(color="rgb(0, 191, 255)", width=4),
         )
-    )  # Bright blue
-    # Add a vertical line for predictions (today's date)
     fig_o3.add_shape(
         dict(
             type="line",
@@ -106,16 +110,23 @@ with subcol1:
         )
     )
     fig_o3.update_layout(
-        plot_bgcolor="rgba(0, 0, 0, 0)",  # Transparent background
-        paper_bgcolor="rgba(0, 0, 0, 0)",  # Transparent paper background
         yaxis_title="O3 Concentration (µg/m³)",
         font=dict(size=14),
-        hovermode="x unified",
     )
-    st.plotly_chart(fig_o3)
-# Plot NO2 in the second subcolumn
-with subcol2:
     fig_no2 = go.Figure()
     fig_no2.add_trace(
         go.Scatter(
@@ -125,8 +136,7 @@ with subcol2:
             name="NO2",
             line=dict(color="rgb(255, 20, 147)", width=4),
         )
-    )  # Bright pink
-    # Add a vertical line for predictions (today's date)
     fig_no2.add_shape(
         dict(
             type="line",
@@ -134,14 +144,23 @@ with subcol2:
             x1=pd.Timestamp.today(),
             y0=min(no2_values),
             y1=max(no2_values),
-            line=dict(color="White", width=3, dash="dash"),
         )
     )
     fig_no2.update_layout(
-        plot_bgcolor="rgba(0, 0, 0, 0)",  # Transparent background
-        paper_bgcolor="rgba(0, 0, 0, 0)",  # Transparent paper background
-        yaxis_title="NO2 Concentration (µg/m³)",
         font=dict(size=14),
-        hovermode="x unified",
     )
-    st.plotly_chart(fig_no2)

 st.set_page_config(
     page_title="Utrecht Pollution Dashboard",
+    page_icon="������🌱",
     layout="wide",
     initial_sidebar_state="expanded",
 )
 get_data()
 dataset = pd.read_csv("dataset.csv")
+today = dataset.iloc[-1]
+previous_day = dataset.iloc[-2]
 prediction = run_model("O3", data=dataset)
 pred1 = prediction[0][0]
 pred2 = prediction[0][1]
 pred3 = prediction[0][2]
 dates_past = pd.date_range(end=pd.Timestamp.today(), periods=8).to_list()
 dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
 # Create a DataFrame
 df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
+# App Title
+st.title("Utrecht Pollution Dashboard🌱")
+col1, col2 = st.columns((2, 3))
+# Create a 3-column layout
+with col1:
+    st.subheader("Current Weather")
+    subcol1, subcol2 = st.columns((1, 1))
+    with subcol1:
+        custom_metric_box(label="Temperature", value=f"{round(today['mean_temp'] * 0.1)} °C", delta=f"{round(today['mean_temp'] * 0.1) - round(previous_day['mean_temp'] * 0.1)} °C")
+        custom_metric_box(label="Humidity", value=f"{round(today['humidity'])} %", delta=f"{round(today['humidity']) - round(previous_day['humidity'])} %")
+        custom_metric_box(label="Pressure", value=f"{round(today['pressure'] * 0.1)} hPa", delta=f"{round(today['pressure'] * 0.1) - round(previous_day['pressure'] * 0.1)} hPa")
+    with subcol2:
+        custom_metric_box(label="Precipitation", value=f"{round(today['percipitation'] * 0.1)} mm", delta=f"{round(today['percipitation'] * 0.1) - round(previous_day['percipitation'] * 0.1)} mm")
+        custom_metric_box(label="Solar Radiation", value=f"{round(today['global_radiation'])} J/m²", delta=f"{round(today['global_radiation']) - round(previous_day['global_radiation'])} J/m²")
+        custom_metric_box(label="Wind Speed", value=f"{round(today['wind_speed'] * 0.1, 1)} m/s", delta=f"{round(today['wind_speed'] * 0.1, 1) - round(previous_day['wind_speed'] * 0.1, 1)} m/s")
+with col2:
+    st.subheader("Current Pollution Levels")
+    sub1, sub2 = st.columns((1, 1))
+    # Display the prediction
+    # st.write(f'Predicted Pollution Level: {prediction[0]:.2f}')
+    with sub1:
+        pollution_box(label="O<sub>3</sub>", value=f"{round(today["O3"])} µg/m³", delta=f"{round(int(today["O3"]) - int(previous_day["O3"]))} µg/m³")
+        with st.expander("Learn more about O3", expanded=False):
+            st.markdown(
+                "*Ozone (O<sub>3</sub>)*: A harmful gas at ground level, contributing to respiratory issues and aggravating asthma.",
+                unsafe_allow_html=True,
+            )
+    with sub2:
+        pollution_box(label="NO<sub>2</sub>", value=f"{round(today["NO2"])} µg/m³", delta=f"{round(int(today["NO2"]) - int(previous_day["NO2"]))} µg/m³")
+        with st.expander("Learn more about O3", expanded=False):
+            st.markdown(
+                "*Wadeva particle (NO<sub>2</sub>)*: A harmful gas at ground level, contributing to respiratory issues and aggravating asthma.",
+                unsafe_allow_html=True,
+            )
+    # Create two columns for two separate graphs
+    # Plot O3 in the first subcolumn
+    st.subheader("O3 and NO2 Prediction")
+    # Plot NO2 in the second subcolumn
     fig_o3 = go.Figure()
     fig_o3.add_trace(
         go.Scatter(
             mode="lines+markers",
             name="O3",
             line=dict(color="rgb(0, 191, 255)", width=4),
+            hovertemplate="%{x|%d-%b-%Y}<br> %{y} µg/m³<extra></extra>",
         )
+    )
     fig_o3.add_shape(
         dict(
             type="line",
         )
     )
     fig_o3.update_layout(
+        plot_bgcolor="rgba(0, 0, 0, 0)",
+        paper_bgcolor="rgba(0, 0, 0, 0)",
         yaxis_title="O3 Concentration (µg/m³)",
         font=dict(size=14),
+        hovermode="x",
+        xaxis=dict(
+            title="Date",
+            type="date",
+            tickmode="array",
+            tickvals=df["Date"],
+            tickformat="%d-%b",
+            tickangle=-45,
+            tickcolor="gray",
+        ),
     )
+    st.plotly_chart(fig_o3, key="fig_o3")
     fig_no2 = go.Figure()
     fig_no2.add_trace(
         go.Scatter(
             name="NO2",
             line=dict(color="rgb(255, 20, 147)", width=4),
         )
+    )
     fig_no2.add_shape(
         dict(
             type="line",
             x1=pd.Timestamp.today(),
             y0=min(no2_values),
             y1=max(no2_values),
+            line=dict(color="gray", width=3, dash="dash"),
         )
     )
     fig_no2.update_layout(
+        plot_bgcolor="rgba(0, 0, 0, 0)",
+        paper_bgcolor="rgba(0, 0, 0, 0)",
+        yaxis_title="NO<sub>2</sub> Concentration (µg/m³)",
         font=dict(size=14),
+        hovermode="x",
+        xaxis=dict(
+            title="Date",
+            type="date",
+            tickmode="array",
+            tickvals=df["Date"],
+            tickformat="%d-%b",
+            tickangle=-45,
+            tickcolor="gray",
+        ),
     )
+    st.plotly_chart(fig_no2, key="fig_no2")

daily_api__pollution.py DELETED Viewed

File without changes

data_api_calls.py CHANGED Viewed

@@ -93,12 +93,11 @@ def add_columns():
     df.insert(2, 'O3', None)
     df.insert(10, 'weekday', None)
-    df.to_csv('combined_data.csv', index=False)
-def scale():
-    file_path = 'combined_data.csv'
-    df = pd.read_csv(file_path)
     columns = list(df.columns)
@@ -142,11 +141,10 @@ def scale():
     df['humidity'] = df['humidity'].astype(int)
     df['global_radiation'] = df['global_radiation'].astype(int)
-    df.to_csv('dataset.csv', index=False)
-def insert_pollution(NO2, O3):
-    file_path = 'dataset.csv'
-    df = pd.read_csv(file_path)
     start_index = 0
     while NO2:
         df.loc[start_index, 'NO2'] = NO2.pop()
@@ -186,8 +184,7 @@ def get_data():
     weather_data()
     pollution_data()
     NO2, O3 = clean_values()
-    add_columns()
-    scale()
-    insert_pollution(NO2, O3)
-    os.remove('combined_data.csv')
     os.remove('weather_data.csv')

     df.insert(2, 'O3', None)
     df.insert(10, 'weekday', None)
+    return df
+def scale(data):
+    df = data
     columns = list(df.columns)
     df['humidity'] = df['humidity'].astype(int)
     df['global_radiation'] = df['global_radiation'].astype(int)
+    return df
+def insert_pollution(NO2, O3, data):
+    df = data
     start_index = 0
     while NO2:
         df.loc[start_index, 'NO2'] = NO2.pop()
     weather_data()
     pollution_data()
     NO2, O3 = clean_values()
+    df = add_columns()
+    scaled_df = scale(df)
+    insert_pollution(NO2, O3, scaled_df)
     os.remove('weather_data.csv')

dataset.csv CHANGED Viewed

@@ -1,9 +1,9 @@
 date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum_visibility,humidity,weekday
-2024-10-15,22.853627569528417,22.52299076212471,51,87,71,0,10194,290,86,Tuesday
-2024-10-16,22.4144459833795,22.78109803921569,61,151,40,0,10103,358,82,Wednesday
-2024-10-17,22.990465489566613,22.928154311649017,51,169,43,6,10100,371,86,Thursday
-2024-10-18,23.659013539651834,23.700536672629696,21,156,42,39,10140,64,97,Friday
-2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
-2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
-2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
-2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,10265,110,86,Tuesday

 date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum_visibility,humidity,weekday
+2024-10-16,22.602711656441716,22.88128805620609,61,151,40,0,10103,358,82,Wednesday
+2024-10-17,23.104327323162277,23.038637566137567,51,169,43,6,10100,371,86,Thursday
+2024-10-18,23.68285714285714,23.71661094224924,21,156,42,39,10140,64,97,Friday
+2024-10-19,24.532038834951457,23.604722719141325,43,147,43,28,10140,236,92,Saturday
+2024-10-20,23.019101941747575,24.173377192982453,68,145,0,0,10160,241,82,Sunday
+2024-10-21,21.275629139072848,25.05873563218391,58,144,27,43,10206,220,92,Monday
+2024-10-22,22.334374999999998,24.5942194092827,76,123,60,12,10265,100,87,Tuesday
+2024-10-23,24.261733333333336,23.56,31,115,7,0,10328,112,95,Wednesday

linear_regression_model.pkl DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:dbe290cfbb7bbd4766aba92ca738296536a79b435b9d9d51e0541d88340261dc
-size 593

src/daily_api__pollution.py DELETED Viewed

@@ -1,161 +0,0 @@
-import http.client
-from datetime import date, timedelta
-import pandas as pd
-from io import StringIO
-import os
-import re
-import csv
-def api_call():
-    particles = ["NO2", "O3"]
-    stations = ["NL10636", "NL10639", "NL10643"]
-    all_dataframes = []
-    today = date.today().isoformat() + "T09:00:00Z"
-    yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
-    latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
-    days_today = 0
-    days_yesterday = 1
-    while(today != latest_date):
-        days_today += 1
-        days_yesterday += 1
-        for particle in particles:
-            for station in stations:
-                conn = http.client.HTTPSConnection("api.luchtmeetnet.nl")
-                payload = ''
-                headers = {}
-                conn.request("GET", f"/open_api/measurements?station_number={station}&formula={particle}&page=1&order_by=timestamp_measured&order_direction=desc&end={today}&start={yesterday}", payload, headers)
-                res = conn.getresponse()
-                data = res.read()
-                decoded_data = data.decode("utf-8")
-                df = pd.read_csv(StringIO(decoded_data))
-                df = df.filter(like='value')
-                all_dataframes.append(df)
-            combined_data = pd.concat(all_dataframes, ignore_index=True)
-            combined_data.to_csv(f'{particle}_{today}.csv', index=False)
-        today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
-        yesterday = (date.today() - timedelta(days_yesterday)).isoformat() + "T09:00:00Z"
-def delete_csv(csvs):
-    for csv in csvs:
-        if(os.path.exists(csv) and os.path.isfile(csv)):
-            os.remove(csv)
-def clean_values():
-    particles = ["NO2", "O3"]
-    csvs = []
-    NO2 = []
-    O3 = []
-    today = date.today().isoformat() + "T09:00:00Z"
-    yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
-    latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
-    days_today = 0
-    while(today != latest_date):
-        for particle in particles:
-            name = f'{particle}_{today}.csv'
-            csvs.append(name)
-        days_today += 1
-        today = (date.today() - timedelta(days_today)).isoformat() + "T09:00:00Z"
-    for csv_file in csvs:
-        values = []  # Reset values for each CSV file
-        # Open the CSV file and read the values
-        with open(csv_file, 'r') as file:
-            reader = csv.reader(file)
-            for row in reader:
-                for value in row:
-                    # Use regular expressions to extract numeric part
-                    cleaned_value = re.findall(r"[-+]?\d*\.\d+|\d+", value)
-                    if cleaned_value:  # If we successfully extract a number
-                        values.append(float(cleaned_value[0]))  # Convert the first match to float
-        # Compute the average if the values list is not empty
-        if values:
-            avg = sum(values) / len(values)
-            if "NO2" in csv_file:
-                NO2.append(avg)
-            else:
-                O3.append(avg)
-    delete_csv(csvs)
-    return NO2, O3
-def add_columns():
-    file_path = 'weather_data.csv'
-    df = pd.read_csv(file_path)
-    df.insert(1, 'NO2', None)
-    df.insert(2, 'O3', None)
-    df.insert(10, 'weekday', None)
-    df.to_csv('combined_data.csv', index=False)
-def scale():
-    file_path = 'combined_data.csv'
-    df = pd.read_csv(file_path)
-    columns = list(df.columns)
-    columns.insert(3, columns.pop(6))
-    df = df[columns]
-    columns.insert(5, columns.pop(9))
-    df = df[columns]
-    columns.insert(9, columns.pop(6))
-    df = df[columns]
-    df = df.rename(columns={
-        'datetime':'date',
-        'windspeed': 'wind_speed',
-        'temp': 'mean_temp',
-        'solarradiation':'global_radiation',
-        'precip':'percipitation',
-        'sealevelpressure':'pressure',
-        'visibility':'minimum_visibility'
-    })
-    df['date'] = pd.to_datetime(df['date'])
-    df['weekday'] = df['date'].dt.day_name()
-    df['wind_speed'] = (df['wind_speed'] / 3.6) * 10
-    df['mean_temp'] = df['mean_temp'] * 10
-    df['minimum_visibility'] = df['minimum_visibility'] * 10
-    df['percipitation'] = df['percipitation'] * 10
-    df['pressure'] = df['pressure'] * 10
-    df['wind_speed'] = df['wind_speed'].astype(int)
-    df['mean_temp'] = df['mean_temp'].astype(int)
-    df['minimum_visibility'] = df['minimum_visibility'].astype(int)
-    df['percipitation'] = df['percipitation'].astype(int)
-    df['pressure'] = df['pressure'].astype(int)
-    df['humidity'] = df['humidity'].astype(int)
-    df['global_radiation'] = df['global_radiation'].astype(int)
-    df.to_csv('recorded_data.csv', index=False)
-def insert_pollution(NO2, O3):
-    file_path = 'recorded_data.csv'
-    df = pd.read_csv(file_path)
-    start_index = 0
-    while NO2:
-        df.loc[start_index, 'NO2'] = NO2.pop()
-        start_index += 1
-    start_index = 0
-    while O3:
-        df.loc[start_index, 'O3'] = O3.pop()
-        start_index += 1
-        df.to_csv('recorded_data.csv', index=False)
-api_call()
-NO2, O3 = clean_values()
-add_columns()
-scale()
-insert_pollution(NO2, O3)
-os.remove('combined_data.csv')
-os.remove('weather_data.csv')

src/helper_functions.py CHANGED Viewed

@@ -1,24 +1,26 @@
 import streamlit as st
-# Custom function to create styled metric boxes with subscripts, smaller label, and larger metric
 def custom_metric_box(label, value, delta):
     st.markdown(f"""
         <div style="
-            background: rgba(255, 255, 255, 0.05);
-            border-radius: 16px;
-            box-shadow: 0 4px 30px rgba(0, 0, 0, 0.1);
-            backdrop-filter: blur(6px);
-            -webkit-backdrop-filter: blur(6px);
-            border: 1px solid rgba(255, 255, 255, 0.15);
-            padding: 15px;
-            margin-bottom: 10px;
-            width: 200px;  /* Fixed width */
         ">
-            <h4 style="font-size: 18px; font-weight: normal; margin: 0;">{label}</h4>  <!-- Smaller label -->
-            <p style="font-size: 36px; font-weight: bold; margin: 0;">{value}</p>  <!-- Larger metric -->
-            <p style="color: {'green' if '+' in delta else 'orange'}; margin: 0;">{delta}</p>
         </div>
-        """, unsafe_allow_html=True)
 # Custom function to create pollution metric boxes with side-by-side layout for label and value
 # Custom function to create pollution metric boxes with side-by-side layout and fixed width

 import streamlit as st
+# Custom function to create styled metric boxes with compact layout
 def custom_metric_box(label, value, delta):
     st.markdown(f"""
         <div style="
+            padding: 5px;
+            margin-bottom: 5px;
+            width: 100%;  /* Full width */
+            display: flex;
+            flex-direction: column;  /* Align items vertically */
+            align-items: flex-start;  /* Align all content to the left */
         ">
+            <div>
+                <h4 style="font-size: 14px; font-weight: normal; margin: 0;">{label}</h4>  <!-- Smaller label -->
+            </div>
+            <div>
+                <p style="font-size: 18px; font-weight: bold; margin: 0;">{value}</p>  <!-- Smaller metric -->
+                <p style="color: {'green' if '+' in delta else 'orange'}; font-size: 12px; margin: 0;">{delta}</p> <!-- Smaller delta text -->
+            </div>
         </div>
+    """, unsafe_allow_html=True)
 # Custom function to create pollution metric boxes with side-by-side layout for label and value
 # Custom function to create pollution metric boxes with side-by-side layout and fixed width

test.ipynb DELETED Viewed

@@ -1,158 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from data_loading import create_features_and_targets\n",
-    "from data_api_calls import get_data\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "dataset = pd.read_csv(\"dataset.csv\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 3,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of rows with missing values dropped: 7\n"
-     ]
-    }
-   ],
-   "source": [
-    "test_data = create_features_and_targets(\n",
-    "    data=dataset,\n",
-    "    target_particle=\"NO2\",\n",
-    "    lag_days=7,\n",
-    "    sma_days=7,\n",
-    "    days_ahead=3,\n",
-    ")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 11,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "Index(['NO2', 'O3', 'wind_speed', 'mean_temp', 'global_radiation',\n",
-       "       'percipitation', 'pressure', 'minimum_visibility', 'humidity',\n",
-       "       'weekday_sin',\n",
-       "       ...\n",
-       "       'O3_last_year_4_days_before', 'NO2_last_year_4_days_before',\n",
-       "       'O3_last_year_5_days_before', 'NO2_last_year_5_days_before',\n",
-       "       'O3_last_year_6_days_before', 'NO2_last_year_6_days_before',\n",
-       "       'O3_last_year_7_days_before', 'NO2_last_year_7_days_before',\n",
-       "       'O3_last_year_3_days_after', 'NO2_last_year_3_days_after'],\n",
-       "      dtype='object', length=103)"
-      ]
-     },
-     "execution_count": 11,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "test_data.columns"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 8,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "from src.models_loading import run_model"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 12,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "2024-10-22 21:43:37.935 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
-      "2024-10-22 21:43:37.938 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
-      "2024-10-22 21:43:37.939 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
-      "2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
-      "2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Number of rows with missing values dropped: 7\n"
-     ]
-    },
-    {
-     "ename": "FileNotFoundError",
-     "evalue": "[Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mFileNotFoundError\u001b[0m                         Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:210\u001b[0m, in \u001b[0;36mCachedFunc.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    208\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    209\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m spinner(message, _cache\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m--> 210\u001b[0m         \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_or_create_cached_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    211\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_or_create_cached_value(args, kwargs)\n",
-      "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:235\u001b[0m, in \u001b[0;36mCachedFunc._get_or_create_cached_value\u001b[0;34m(self, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m    233\u001b[0m     cached_result \u001b[38;5;241m=\u001b[39m cache\u001b[38;5;241m.\u001b[39mread_result(value_key)\n\u001b[1;32m    234\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handle_cache_hit(cached_result)\n\u001b[0;32m--> 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_cache_miss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:292\u001b[0m, in \u001b[0;36mCachedFunc._handle_cache_miss\u001b[0;34m(self, cache, value_key, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m    288\u001b[0m \u001b[38;5;66;03m# We acquired the lock before any other thread. Compute the value!\u001b[39;00m\n\u001b[1;32m    289\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39mcalling_cached_function(\n\u001b[1;32m    290\u001b[0m     \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mfunc\n\u001b[1;32m    291\u001b[0m ):\n\u001b[0;32m--> 292\u001b[0m     computed_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_info\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    294\u001b[0m \u001b[38;5;66;03m# We've computed our value, and now we need to write it back to the cache\u001b[39;00m\n\u001b[1;32m    295\u001b[0m \u001b[38;5;66;03m# along with any \"replay messages\" that were generated during value computation.\u001b[39;00m\n\u001b[1;32m    296\u001b[0m messages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39m_most_recent_messages\n",
-      "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/models_loading.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[0;34m(particle, data)\u001b[0m\n\u001b[1;32m     26\u001b[0m \u001b[38;5;129m@st\u001b[39m\u001b[38;5;241m.\u001b[39mcache_resource(ttl\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m300\u001b[39m)  \u001b[38;5;66;03m# Reruns every 6 hours\u001b[39;00m\n\u001b[1;32m     27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[0;32m---> 28\u001b[0m     input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m     29\u001b[0m     model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[1;32m     31\u001b[0m     \u001b[38;5;66;03m# Run the model with static input\u001b[39;00m\n",
-      "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/data_loading.py:125\u001b[0m, in \u001b[0;36mcreate_features\u001b[0;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[1;32m    121\u001b[0m x \u001b[38;5;241m=\u001b[39m data[feature_cols]\n\u001b[1;32m    124\u001b[0m \u001b[38;5;66;03m# Initialize scalers\u001b[39;00m\n\u001b[0;32m--> 125\u001b[0m feature_scaler \u001b[38;5;241m=\u001b[39m \u001b[43mjoblib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../scalers/feature_scaler_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.joblib\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m    127\u001b[0m \u001b[38;5;66;03m# Fit the scalers on the training data\u001b[39;00m\n\u001b[1;32m    128\u001b[0m X_scaled \u001b[38;5;241m=\u001b[39m feature_scaler\u001b[38;5;241m.\u001b[39mfit_transform(x)\n",
-      "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/joblib/numpy_pickle.py:650\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename, mmap_mode)\u001b[0m\n\u001b[1;32m    648\u001b[0m         obj \u001b[38;5;241m=\u001b[39m _unpickle(fobj)\n\u001b[1;32m    649\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 650\u001b[0m     \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m    651\u001b[0m         \u001b[38;5;28;01mwith\u001b[39;00m _read_fileobject(f, filename, mmap_mode) \u001b[38;5;28;01mas\u001b[39;00m fobj:\n\u001b[1;32m    652\u001b[0m             \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m    653\u001b[0m                 \u001b[38;5;66;03m# if the returned file object is a string, this means we\u001b[39;00m\n\u001b[1;32m    654\u001b[0m                 \u001b[38;5;66;03m# try to load a pickle file generated with an version of\u001b[39;00m\n\u001b[1;32m    655\u001b[0m                 \u001b[38;5;66;03m# Joblib so we load it with joblib compatibility function.\u001b[39;00m\n",
-      "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'"
-     ]
-    }
-   ],
-   "source": [
-    "prediction = run_model(\"O3\", data=dataset)"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "ml-industry",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.5"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

test.py DELETED Viewed

@@ -1,7 +0,0 @@
-import pandas as pd
-from src.models_loading import run_model
-dataset = pd.read_csv("dataset.csv")
-prediction = run_model("O3", data=dataset)
-print(type(prediction))
-print(prediction)