elisaklunder commited on
Commit
14fc71f
1 Parent(s): 4efcd8f

joblib scalers changesd:

Browse files
app.py CHANGED
@@ -4,7 +4,7 @@ import plotly.graph_objects as go
4
  import streamlit as st
5
 
6
  from src.helper_functions import custom_metric_box, pollution_box
7
- from src.predict import get_data_and_predictions
8
 
9
  st.set_page_config(
10
  page_title="Utrecht Pollution Dashboard ",
@@ -15,6 +15,8 @@ st.set_page_config(
15
 
16
  alt.themes.enable("dark")
17
 
 
 
18
  week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
19
 
20
  today = week_data.iloc[-1]
 
4
  import streamlit as st
5
 
6
  from src.helper_functions import custom_metric_box, pollution_box
7
+ from src.predict import get_data_and_predictions, update_data_and_predictions
8
 
9
  st.set_page_config(
10
  page_title="Utrecht Pollution Dashboard ",
 
15
 
16
  alt.themes.enable("dark")
17
 
18
+ update_data_and_predictions()
19
+
20
  week_data, predictions_O3, predictions_NO2 = get_data_and_predictions()
21
 
22
  today = week_data.iloc[-1]
past_pollution_data.csv CHANGED
@@ -1,12 +1,18 @@
1
  date,NO2,O3
2
- 2023-10-18,10.842702702702699,39.81260000000001
3
  2023-10-19,17.97026666666666,31.779024390243908
4
  2023-10-20,17.233055555555563,18.7156
5
- 2023-10-21,15.023599999999993,22.04
6
  2023-10-22,8.723378378378372,48.33439999999999
7
- 2023-10-23,20.634266666666676,15.586000000000002
8
- 2023-10-24,15.115599999999999,24.628085106382972
9
- 2023-10-25,22.885675675675678,27.117599999999992
10
- 2023-10-26,21.531756756756756,13.3216
11
  2023-10-27,23.07226666666666,16.15416666666666
12
  2023-10-28,24.89121621621622,24.59040816326531
 
 
 
 
 
 
 
1
  date,NO2,O3
2
+ 2023-10-18,10.8427027027027,39.81260000000001
3
  2023-10-19,17.97026666666666,31.779024390243908
4
  2023-10-20,17.233055555555563,18.7156
5
+ 2023-10-21,15.023599999999991,22.04
6
  2023-10-22,8.723378378378372,48.33439999999999
7
+ 2023-10-23,20.63426666666668,15.586000000000002
8
+ 2023-10-24,15.1156,24.62808510638297
9
+ 2023-10-25,22.88567567567568,27.117599999999992
10
+ 2023-10-26,21.53175675675676,13.3216
11
  2023-10-27,23.07226666666666,16.15416666666666
12
  2023-10-28,24.89121621621622,24.59040816326531
13
+ 2023-10-29,9.724428571428573,51.525200000000005
14
+ 2023-10-30,11.20205479452055,52.820600000000006
15
+ 2023-10-31,17.494666666666667,44.458541666666655
16
+ 2023-11-01,21.588095238095235,29.20631578947369
17
+ 2023-11-02,9.745714285714286,48.39760869565216
18
+ 2023-11-03,7.163243243243242,61.421599999999984
past_weather_data.csv CHANGED
@@ -11,3 +11,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
11
  2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
12
  2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
13
  2023-10-28,11.4,88.6,3,18.4,994.4,29.3,48.5
 
 
 
 
 
 
11
  2023-10-26,9.4,97.6,0.1,11.2,995.6,4.8,36.0
12
  2023-10-27,10.6,97.9,11.4,14.8,992.0,9.5,20.5
13
  2023-10-28,11.4,88.6,3,18.4,994.4,29.3,48.5
14
+ 2023-10-29,13,82.2,9.5,31.7,991.5,38.8,35.4
15
+ 2023-10-30,11.2,90.4,13,18.4,997.5,28.8,27
16
+ 2023-10-31,11,93.7,18.6,18,1000.7,17.9,29.8
17
+ 2023-11-01,12.4,88.5,4.9,25.9,997.8,32.6,31.5
18
+ 2023-11-02,11,80,8.7,46.4,976.4,33.6,21.5
pollution_data.csv CHANGED
@@ -8,3 +8,4 @@ date,NO2,O3
8
  2024-10-23,21.974793814433,22.21468879668051
9
  2024-10-24,25.51256756756757,20.91370967741937
10
  2024-10-25,21.72051282051282,22.33230769230769
 
 
8
  2024-10-23,21.974793814433,22.21468879668051
9
  2024-10-24,25.51256756756757,20.91370967741937
10
  2024-10-25,21.72051282051282,22.33230769230769
11
+ 2024-10-30,22.26162162162162,18.034435483870976
scalers/feature_scaler_NO2.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:4d2731707963125bbb452df55c91920c62eb745c7e78c0a79bdf7fab173b3369
3
- size 5791
 
 
 
 
scalers/feature_scaler_O3.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6e30fc2c7ce7a00bc1b8db08e5f4ffa110136a796f55a68beedb479b07189f7
3
- size 5023
 
 
 
 
scalers/target_scaler_NO2.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:255a0d1dd1d8673ce03e838e9fc1a7df4dab1248ca70f6cb73b66aea83ed6316
3
- size 1023
 
 
 
 
scalers/target_scaler_O3.joblib DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ad485897b59228f1c1efd8c76cc2fa771d10efd379297f163ceba32dbacbab6
3
- size 1023
 
 
 
 
src/data_api_calls.py CHANGED
@@ -114,8 +114,6 @@ def update_pollution_data():
114
 
115
 
116
  def get_combined_data():
117
- update_weather_data()
118
- update_pollution_data()
119
 
120
  weather_df = pd.read_csv(WEATHER_DATA_FILE)
121
 
 
114
 
115
 
116
  def get_combined_data():
 
 
117
 
118
  weather_df = pd.read_csv(WEATHER_DATA_FILE)
119
 
src/features_pipeline.py CHANGED
@@ -1,12 +1,19 @@
 
1
  import warnings
2
 
3
  import joblib
4
  import numpy as np
5
  import pandas as pd
 
 
 
6
  from src.past_data_api_calls import get_past_combined_data
7
 
8
  warnings.filterwarnings("ignore")
9
 
 
 
 
10
 
11
  def create_features(
12
  data,
@@ -91,7 +98,10 @@ def create_features(
91
  x = data[feature_cols]
92
 
93
  # Scale
94
- feature_scaler = joblib.load(f"scalers/feature_scaler_{target_particle}.joblib")
 
 
 
95
  X_scaled = feature_scaler.transform(x)
96
 
97
  # Convert scaled data back to DataFrame for consistency
 
1
+ import os
2
  import warnings
3
 
4
  import joblib
5
  import numpy as np
6
  import pandas as pd
7
+ from dotenv import load_dotenv
8
+ from huggingface_hub import hf_hub_download, login
9
+
10
  from src.past_data_api_calls import get_past_combined_data
11
 
12
  warnings.filterwarnings("ignore")
13
 
14
+ load_dotenv()
15
+ login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
16
+
17
 
18
  def create_features(
19
  data,
 
98
  x = data[feature_cols]
99
 
100
  # Scale
101
+ repo_id = f"elisaklunder/Utrecht-{target_particle}-Forecasting-Model"
102
+ file_name = f"feature_scaler_{target_particle}.joblib"
103
+ path = hf_hub_download(repo_id=repo_id, filename=file_name)
104
+ feature_scaler = joblib.load(path)
105
  X_scaled = feature_scaler.transform(x)
106
 
107
  # Convert scaled data back to DataFrame for consistency
src/predict.py CHANGED
@@ -1,19 +1,23 @@
1
  import os
2
- from datetime import date, timedelta
3
 
4
  import joblib
5
  import pandas as pd
6
  from dotenv import load_dotenv
7
  from huggingface_hub import hf_hub_download, login
8
 
9
- from src.data_api_calls import get_combined_data
 
 
 
 
10
  from src.features_pipeline import create_features
11
 
 
 
12
 
13
- def load_model(particle):
14
- load_dotenv()
15
- login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
16
 
 
17
  repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
18
  if particle == "O3":
19
  file_name = "O3_svr_model.pkl"
@@ -29,13 +33,19 @@ def run_model(particle, data):
29
  input_data = create_features(data=data, target_particle=particle)
30
  model = load_model(particle)
31
  prediction = model.predict(input_data)
32
- target_scaler = joblib.load(f"scalers/target_scaler_{particle}.joblib")
 
 
 
 
33
  prediction = target_scaler.inverse_transform(prediction)
 
34
  return prediction
35
 
36
 
37
- def get_data_and_predictions():
38
- PREDICTIONS_FILE = "predictions_history.csv"
 
39
 
40
  week_data = get_combined_data()
41
 
@@ -63,6 +73,8 @@ def get_data_and_predictions():
63
 
64
  predictions_df = pd.DataFrame(prediction_data)
65
 
 
 
66
  if os.path.exists(PREDICTIONS_FILE):
67
  existing_data = pd.read_csv(PREDICTIONS_FILE)
68
  # Filter out predictions made today to avoid duplicates
@@ -70,8 +82,28 @@ def get_data_and_predictions():
70
  ~(existing_data["date_predicted"] == str(date.today()))
71
  ]
72
  combined_data = pd.concat([existing_data, predictions_df])
 
73
  else:
74
  combined_data = predictions_df
75
 
76
  combined_data.to_csv(PREDICTIONS_FILE, index=False)
77
- return week_data, o3_predictions, no2_predictions
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
+ from datetime import date, datetime, timedelta
3
 
4
  import joblib
5
  import pandas as pd
6
  from dotenv import load_dotenv
7
  from huggingface_hub import hf_hub_download, login
8
 
9
+ from src.data_api_calls import (
10
+ get_combined_data,
11
+ update_pollution_data,
12
+ update_weather_data,
13
+ )
14
  from src.features_pipeline import create_features
15
 
16
+ load_dotenv()
17
+ login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
18
 
 
 
 
19
 
20
+ def load_model(particle):
21
  repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
22
  if particle == "O3":
23
  file_name = "O3_svr_model.pkl"
 
33
  input_data = create_features(data=data, target_particle=particle)
34
  model = load_model(particle)
35
  prediction = model.predict(input_data)
36
+
37
+ repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model"
38
+ file_name = f"target_scaler_{particle}.joblib"
39
+ path = hf_hub_download(repo_id=repo_id, filename=file_name)
40
+ target_scaler = joblib.load(path)
41
  prediction = target_scaler.inverse_transform(prediction)
42
+
43
  return prediction
44
 
45
 
46
+ def update_data_and_predictions():
47
+ update_weather_data()
48
+ update_pollution_data()
49
 
50
  week_data = get_combined_data()
51
 
 
73
 
74
  predictions_df = pd.DataFrame(prediction_data)
75
 
76
+ PREDICTIONS_FILE = "predictions_history.csv"
77
+
78
  if os.path.exists(PREDICTIONS_FILE):
79
  existing_data = pd.read_csv(PREDICTIONS_FILE)
80
  # Filter out predictions made today to avoid duplicates
 
82
  ~(existing_data["date_predicted"] == str(date.today()))
83
  ]
84
  combined_data = pd.concat([existing_data, predictions_df])
85
+ combined_data.drop_duplicates()
86
  else:
87
  combined_data = predictions_df
88
 
89
  combined_data.to_csv(PREDICTIONS_FILE, index=False)
90
+
91
+
92
+ def get_data_and_predictions():
93
+ week_data = get_combined_data()
94
+
95
+ PREDICTIONS_FILE = "predictions_history.csv"
96
+ data = pd.read_csv(PREDICTIONS_FILE)
97
+
98
+ today = datetime.today().strftime("%Y-%m-%d")
99
+ today_predictions = data[(data["date_predicted"] == today)]
100
+
101
+ # Extract predictions for O3 and NO2
102
+ o3_predictions = today_predictions[today_predictions["pollutant"] == "O3"][
103
+ "prediction_value"
104
+ ].values
105
+ no2_predictions = today_predictions[today_predictions["pollutant"] == "NO2"][
106
+ "prediction_value"
107
+ ].values
108
+
109
+ return week_data, [o3_predictions], [no2_predictions]
weather_data.csv CHANGED
@@ -8,3 +8,8 @@ date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
8
  2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
9
  2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
10
  2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
 
 
 
 
 
 
8
  2024-10-23,11.2,97.3,0.0,13.0,1032.8,6.5,12.5
9
  2024-10-24,10.4,94.0,0.0,20.5,1024.7,13.0,62.5
10
  2024-10-25,13.6,92.2,0.5,11.9,1016.8,24.0,93.0
11
+ 2024-10-26,13.7,91.5,0,11.9,1016.3,23.3,8
12
+ 2024-10-27,12,90.9,0,13.7,1019.6,23.7,28.6
13
+ 2024-10-28,10.5,92.8,1.7,19.4,1022.7,24,28.2
14
+ 2024-10-29,13.8,95.9,0.2,20.5,1023.1,8.1,16
15
+ 2024-10-30,12.7,92.5,0.6,9.4,1027.5,13.7,32.9