elisaklunder
commited on
Commit
•
14fc71f
1
Parent(s):
4efcd8f
joblib scalers changesd:
Browse files- app.py +3 -1
- past_pollution_data.csv +12 -6
- past_weather_data.csv +5 -0
- pollution_data.csv +1 -0
- scalers/feature_scaler_NO2.joblib +0 -3
- scalers/feature_scaler_O3.joblib +0 -3
- scalers/target_scaler_NO2.joblib +0 -3
- scalers/target_scaler_O3.joblib +0 -3
- src/data_api_calls.py +0 -2
- src/features_pipeline.py +11 -1
- src/predict.py +41 -9
- weather_data.csv +5 -0
app.py
CHANGED
@@ -4,7 +4,7 @@ import plotly.graph_objects as go
|
|
4 |
import streamlit as st
|
5 |
|
6 |
from src.helper_functions import custom_metric_box, pollution_box
|
7 |
-
from src.predict import get_data_and_predictions
|
8 |
|
9 |
st.set_page_config(
|
10 |
page_title="Utrecht Pollution Dashboard ",
|
@@ -15,6 +15,8 @@ st.set_page_config(
|
|
15 |
|
16 |
alt.themes.enable("dark")
|
17 |
|
|
|
|
|
18 |
week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
|
19 |
|
20 |
today = week_data.iloc[-1]
|
|
|
4 |
import streamlit as st
|
5 |
|
6 |
from src.helper_functions import custom_metric_box, pollution_box
|
7 |
+
from src.predict import get_data_and_predictions, update_data_and_predictions
|
8 |
|
9 |
st.set_page_config(
|
10 |
page_title="Utrecht Pollution Dashboard ",
|
|
|
15 |
|
16 |
alt.themes.enable("dark")
|
17 |
|
18 |
+
update_data_and_predictions()
|
19 |
+
|
20 |
week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
|
21 |
|
22 |
today = week_data.iloc[-1]
|
past_pollution_data.csv
CHANGED
@@ -1,12 +1,18 @@
|
|
1 |
date,NO2,O3
|
2 |
-
2023-10-18,10.
|
3 |
2023-10-19,17.97026666666666,31.779024390243908
|
4 |
2023-10-20,17.233055555555563,18.7156
|
5 |
-
2023-10-21,15.
|
6 |
2023-10-22,8.723378378378372,48.33439999999999
|
7 |
-
2023-10-23,20.
|
8 |
-
2023-10-24,15.
|
9 |
-
2023-10-25,22.
|
10 |
-
2023-10-26,21.
|
11 |
2023-10-27,23.07226666666666,16.15416666666666
|
12 |
2023-10-28,24.89121621621622,24.59040816326531
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
date,NO2,O3
|
2 |
+
2023-10-18,10.8427027027027,39.81260000000001
|
3 |
2023-10-19,17.97026666666666,31.779024390243908
|
4 |
2023-10-20,17.233055555555563,18.7156
|
5 |
+
2023-10-21,15.023599999999991,22.04
|
6 |
2023-10-22,8.723378378378372,48.33439999999999
|
7 |
+
2023-10-23,20.63426666666668,15.586000000000002
|
8 |
+
2023-10-24,15.1156,24.62808510638297
|
9 |
+
2023-10-25,22.88567567567568,27.117599999999992
|
10 |
+
2023-10-26,21.53175675675676,13.3216
|
11 |
2023-10-27,23.07226666666666,16.15416666666666
|
12 |
2023-10-28,24.89121621621622,24.59040816326531
|
13 |
+
2023-10-29,9.724428571428573,51.525200000000005
|
14 |
+
2023-10-30,11.20205479452055,52.820600000000006
|
15 |
+
2023-10-31,17.494666666666667,44.458541666666655
|
16 |
+
2023-11-01,21.588095238095235,29.20631578947369
|
17 |
+
2023-11-02,9.745714285714286,48.39760869565216
|
18 |
+
2023-11-03,7.163243243243242,61.421599999999984
|
past_weather_data.csv
CHANGED
@@ -11,3 +11,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
|
|
11 |
2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
|
12 |
2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
|
13 |
2023-10-28,11.4,88.6,3,18.4,994.4,29.3,48.5
|
|
|
|
|
|
|
|
|
|
|
|
11 |
2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
|
12 |
2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
|
13 |
2023-10-28,11.4,88.6,3,18.4,994.4,29.3,48.5
|
14 |
+
2023-10-29,13,82.2,9.5,31.7,991.5,38.8,35.4
|
15 |
+
2023-10-30,11.2,90.4,13,18.4,997.5,28.8,27
|
16 |
+
2023-10-31,11,93.7,18.6,18,1000.7,17.9,29.8
|
17 |
+
2023-11-01,12.4,88.5,4.9,25.9,997.8,32.6,31.5
|
18 |
+
2023-11-02,11,80,8.7,46.4,976.4,33.6,21.5
|
pollution_data.csv
CHANGED
@@ -8,3 +8,4 @@ date,NO2,O3
|
|
8 |
2024-10-23,21.974793814433,22.21468879668051
|
9 |
2024-10-24,25.51256756756757,20.91370967741937
|
10 |
2024-10-25,21.72051282051282,22.33230769230769
|
|
|
|
8 |
2024-10-23,21.974793814433,22.21468879668051
|
9 |
2024-10-24,25.51256756756757,20.91370967741937
|
10 |
2024-10-25,21.72051282051282,22.33230769230769
|
11 |
+
2024-10-30,22.26162162162162,18.034435483870976
|
scalers/feature_scaler_NO2.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:4d2731707963125bbb452df55c91920c62eb745c7e78c0a79bdf7fab173b3369
|
3 |
-
size 5791
|
|
|
|
|
|
|
|
scalers/feature_scaler_O3.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:d6e30fc2c7ce7a00bc1b8db08e5f4ffa110136a796f55a68beedb479b07189f7
|
3 |
-
size 5023
|
|
|
|
|
|
|
|
scalers/target_scaler_NO2.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:255a0d1dd1d8673ce03e838e9fc1a7df4dab1248ca70f6cb73b66aea83ed6316
|
3 |
-
size 1023
|
|
|
|
|
|
|
|
scalers/target_scaler_O3.joblib
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:2ad485897b59228f1c1efd8c76cc2fa771d10efd379297f163ceba32dbacbab6
|
3 |
-
size 1023
|
|
|
|
|
|
|
|
src/data_api_calls.py
CHANGED
@@ -114,8 +114,6 @@ def update_pollution_data():
|
|
114 |
|
115 |
|
116 |
def get_combined_data():
|
117 |
-
update_weather_data()
|
118 |
-
update_pollution_data()
|
119 |
|
120 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
121 |
|
|
|
114 |
|
115 |
|
116 |
def get_combined_data():
|
|
|
|
|
117 |
|
118 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
119 |
|
src/features_pipeline.py
CHANGED
@@ -1,12 +1,19 @@
|
|
|
|
1 |
import warnings
|
2 |
|
3 |
import joblib
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
|
|
|
|
|
|
6 |
from src.past_data_api_calls import get_past_combined_data
|
7 |
|
8 |
warnings.filterwarnings("ignore")
|
9 |
|
|
|
|
|
|
|
10 |
|
11 |
def create_features(
|
12 |
data,
|
@@ -91,7 +98,10 @@ def create_features(
|
|
91 |
x = data[feature_cols]
|
92 |
|
93 |
# Scale
|
94 |
-
|
|
|
|
|
|
|
95 |
X_scaled = feature_scaler.transform(x)
|
96 |
|
97 |
# Convert scaled data back to DataFrame for consistency
|
|
|
1 |
+
import os
|
2 |
import warnings
|
3 |
|
4 |
import joblib
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
7 |
+
from dotenv import load_dotenv
|
8 |
+
from huggingface_hub import hf_hub_download, login
|
9 |
+
|
10 |
from src.past_data_api_calls import get_past_combined_data
|
11 |
|
12 |
warnings.filterwarnings("ignore")
|
13 |
|
14 |
+
load_dotenv()
|
15 |
+
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
16 |
+
|
17 |
|
18 |
def create_features(
|
19 |
data,
|
|
|
98 |
x = data[feature_cols]
|
99 |
|
100 |
# Scale
|
101 |
+
repo_id = f"elisaklunder/Utrecht-{target_particle}-Forecasting-Model"
|
102 |
+
file_name = f"feature_scaler_{target_particle}.joblib"
|
103 |
+
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
104 |
+
feature_scaler = joblib.load(path)
|
105 |
X_scaled = feature_scaler.transform(x)
|
106 |
|
107 |
# Convert scaled data back to DataFrame for consistency
|
src/predict.py
CHANGED
@@ -1,19 +1,23 @@
|
|
1 |
import os
|
2 |
-
from datetime import date, timedelta
|
3 |
|
4 |
import joblib
|
5 |
import pandas as pd
|
6 |
from dotenv import load_dotenv
|
7 |
from huggingface_hub import hf_hub_download, login
|
8 |
|
9 |
-
from src.data_api_calls import
|
|
|
|
|
|
|
|
|
10 |
from src.features_pipeline import create_features
|
11 |
|
|
|
|
|
12 |
|
13 |
-
def load_model(particle):
|
14 |
-
load_dotenv()
|
15 |
-
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
16 |
|
|
|
17 |
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
18 |
if particle == "O3":
|
19 |
file_name = "O3_svr_model.pkl"
|
@@ -29,13 +33,19 @@ def run_model(particle, data):
|
|
29 |
input_data = create_features(data=data, target_particle=particle)
|
30 |
model = load_model(particle)
|
31 |
prediction = model.predict(input_data)
|
32 |
-
|
|
|
|
|
|
|
|
|
33 |
prediction = target_scaler.inverse_transform(prediction)
|
|
|
34 |
return prediction
|
35 |
|
36 |
|
37 |
-
def
|
38 |
-
|
|
|
39 |
|
40 |
week_data = get_combined_data()
|
41 |
|
@@ -63,6 +73,8 @@ def get_data_and_predictions():
|
|
63 |
|
64 |
predictions_df = pd.DataFrame(prediction_data)
|
65 |
|
|
|
|
|
66 |
if os.path.exists(PREDICTIONS_FILE):
|
67 |
existing_data = pd.read_csv(PREDICTIONS_FILE)
|
68 |
# Filter out predictions made today to avoid duplicates
|
@@ -70,8 +82,28 @@ def get_data_and_predictions():
|
|
70 |
~(existing_data["date_predicted"] == str(date.today()))
|
71 |
]
|
72 |
combined_data = pd.concat([existing_data, predictions_df])
|
|
|
73 |
else:
|
74 |
combined_data = predictions_df
|
75 |
|
76 |
combined_data.to_csv(PREDICTIONS_FILE, index=False)
|
77 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
from datetime import date, datetime, timedelta
|
3 |
|
4 |
import joblib
|
5 |
import pandas as pd
|
6 |
from dotenv import load_dotenv
|
7 |
from huggingface_hub import hf_hub_download, login
|
8 |
|
9 |
+
from src.data_api_calls import (
|
10 |
+
get_combined_data,
|
11 |
+
update_pollution_data,
|
12 |
+
update_weather_data,
|
13 |
+
)
|
14 |
from src.features_pipeline import create_features
|
15 |
|
16 |
+
load_dotenv()
|
17 |
+
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
18 |
|
|
|
|
|
|
|
19 |
|
20 |
+
def load_model(particle):
|
21 |
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
22 |
if particle == "O3":
|
23 |
file_name = "O3_svr_model.pkl"
|
|
|
33 |
input_data = create_features(data=data, target_particle=particle)
|
34 |
model = load_model(particle)
|
35 |
prediction = model.predict(input_data)
|
36 |
+
|
37 |
+
repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
|
38 |
+
file_name = f"target_scaler_{particle}.joblib"
|
39 |
+
path = hf_hub_download(repo_id=repo_id, filename=file_name)
|
40 |
+
target_scaler = joblib.load(path)
|
41 |
prediction = target_scaler.inverse_transform(prediction)
|
42 |
+
|
43 |
return prediction
|
44 |
|
45 |
|
46 |
+
def update_data_and_predictions():
|
47 |
+
update_weather_data()
|
48 |
+
update_pollution_data()
|
49 |
|
50 |
week_data = get_combined_data()
|
51 |
|
|
|
73 |
|
74 |
predictions_df = pd.DataFrame(prediction_data)
|
75 |
|
76 |
+
PREDICTIONS_FILE = "predictions_history.csv"
|
77 |
+
|
78 |
if os.path.exists(PREDICTIONS_FILE):
|
79 |
existing_data = pd.read_csv(PREDICTIONS_FILE)
|
80 |
# Filter out predictions made today to avoid duplicates
|
|
|
82 |
~(existing_data["date_predicted"] == str(date.today()))
|
83 |
]
|
84 |
combined_data = pd.concat([existing_data, predictions_df])
|
85 |
+
combined_data.drop_duplicates()
|
86 |
else:
|
87 |
combined_data = predictions_df
|
88 |
|
89 |
combined_data.to_csv(PREDICTIONS_FILE, index=False)
|
90 |
+
|
91 |
+
|
92 |
+
def get_data_and_predictions():
|
93 |
+
week_data = get_combined_data()
|
94 |
+
|
95 |
+
PREDICTIONS_FILE = "predictions_history.csv"
|
96 |
+
data = pd.read_csv(PREDICTIONS_FILE)
|
97 |
+
|
98 |
+
today = datetime.today().strftime("%Y-%m-%d")
|
99 |
+
today_predictions = data[(data["date_predicted"] == today)]
|
100 |
+
|
101 |
+
# Extract predictions for O3 and NO2
|
102 |
+
o3_predictions = today_predictions[today_predictions["pollutant"] == "O3"][
|
103 |
+
"prediction_value"
|
104 |
+
].values
|
105 |
+
no2_predictions = today_predictions[today_predictions["pollutant"] == "NO2"][
|
106 |
+
"prediction_value"
|
107 |
+
].values
|
108 |
+
|
109 |
+
return week_data, [o3_predictions], [no2_predictions]
|
weather_data.csv
CHANGED
@@ -8,3 +8,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
|
|
8 |
2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
|
9 |
2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
|
10 |
2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
|
|
|
|
|
|
|
|
|
|
|
|
8 |
2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
|
9 |
2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
|
10 |
2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
|
11 |
+
2024-10-26,13.7,91.5,0,11.9,1016.3,23.3,8
|
12 |
+
2024-10-27,12,90.9,0,13.7,1019.6,23.7,28.6
|
13 |
+
2024-10-28,10.5,92.8,1.7,19.4,1022.7,24,28.2
|
14 |
+
2024-10-29,13.8,95.9,0.2,20.5,1023.1,8.1,16
|
15 |
+
2024-10-30,12.7,92.5,0.6,9.4,1027.5,13.7,32.9
|