app.py gets the data and runs the model; last year features to be implemented
Browse files- __pycache__/data_api_calls.cpython-312.pyc +0 -0
- app.py +19 -15
- dataset.csv +1 -1
- requirements.txt +1 -0
- scalers/feature_scaler_NO2.joblib +3 -0
- scalers/feature_scaler_O3.joblib +3 -0
- src/data_loading.py +4 -81
- src/models_loading.py +5 -7
- test.ipynb +67 -97
- test.py +4 -10
__pycache__/data_api_calls.cpython-312.pyc
CHANGED
Binary files a/__pycache__/data_api_calls.cpython-312.pyc and b/__pycache__/data_api_calls.cpython-312.pyc differ
|
|
app.py
CHANGED
@@ -2,9 +2,10 @@ import altair as alt
|
|
2 |
import pandas as pd
|
3 |
import plotly.graph_objects as go
|
4 |
import streamlit as st
|
|
|
|
|
5 |
from src.helper_functions import custom_metric_box, pollution_box
|
6 |
from src.models_loading import run_model
|
7 |
-
from data_api_calls import get_data
|
8 |
|
9 |
st.set_page_config(
|
10 |
page_title="Utrecht Pollution Dashboard",
|
@@ -15,10 +16,12 @@ st.set_page_config(
|
|
15 |
|
16 |
alt.themes.enable("dark")
|
17 |
|
18 |
-
test_predictions = run_model("O3")
|
19 |
get_data()
|
20 |
-
|
21 |
-
|
|
|
|
|
|
|
22 |
|
23 |
# App Title
|
24 |
st.title("Utrecht Pollution Dashboard🌱")
|
@@ -54,23 +57,24 @@ with col1:
|
|
54 |
pollution_box(label="NO<sub>2</sub>", value="28 µg/m³", delta="+3 µg/m³")
|
55 |
|
56 |
# Sample data (replace with your actual data)
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
).to_list()
|
61 |
|
62 |
# O3 and NO2 values for the past 7 days
|
63 |
-
o3_past_values = [
|
64 |
-
no2_past_values = [
|
|
|
|
|
|
|
|
|
65 |
|
66 |
-
#
|
67 |
-
|
68 |
-
|
69 |
|
70 |
# Combine dates and values
|
71 |
dates = dates_past + dates_future
|
72 |
-
o3_values = o3_past_values + o3_future_values
|
73 |
-
no2_values = no2_past_values + no2_future_values
|
74 |
|
75 |
# Create a DataFrame
|
76 |
df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
|
|
|
2 |
import pandas as pd
|
3 |
import plotly.graph_objects as go
|
4 |
import streamlit as st
|
5 |
+
|
6 |
+
from data_api_calls import get_data
|
7 |
from src.helper_functions import custom_metric_box, pollution_box
|
8 |
from src.models_loading import run_model
|
|
|
9 |
|
10 |
st.set_page_config(
|
11 |
page_title="Utrecht Pollution Dashboard",
|
|
|
16 |
|
17 |
alt.themes.enable("dark")
|
18 |
|
|
|
19 |
get_data()
|
20 |
+
dataset = pd.read_csv("dataset.csv")
|
21 |
+
prediction = run_model("O3", data=dataset)
|
22 |
+
pred1 = prediction[0][0]
|
23 |
+
pred2 = prediction[0][1]
|
24 |
+
pred3 = prediction[0][2]
|
25 |
|
26 |
# App Title
|
27 |
st.title("Utrecht Pollution Dashboard🌱")
|
|
|
57 |
pollution_box(label="NO<sub>2</sub>", value="28 µg/m³", delta="+3 µg/m³")
|
58 |
|
59 |
# Sample data (replace with your actual data)
|
60 |
+
# Sample data (replace with your actual data)
|
61 |
+
dates_past = pd.date_range(end=pd.Timestamp.today(), periods=8).to_list()
|
62 |
+
dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
|
|
|
63 |
|
64 |
# O3 and NO2 values for the past 7 days
|
65 |
+
o3_past_values = dataset["O3"]
|
66 |
+
no2_past_values = dataset["NO2"]
|
67 |
+
|
68 |
+
# Predicted O3 and NO2 values for the next 3 days (convert to pandas Series)
|
69 |
+
o3_future_values = pd.Series(prediction[0].flatten()) # Flatten the array to 1D
|
70 |
+
no2_future_values = pd.Series([26, 27, 28]) # Example prediction data
|
71 |
|
72 |
+
# Combine the past and future values using pd.concat
|
73 |
+
o3_values = pd.concat([o3_past_values, o3_future_values], ignore_index=True)
|
74 |
+
no2_values = pd.concat([no2_past_values, no2_future_values], ignore_index=True)
|
75 |
|
76 |
# Combine dates and values
|
77 |
dates = dates_past + dates_future
|
|
|
|
|
78 |
|
79 |
# Create a DataFrame
|
80 |
df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
|
dataset.csv
CHANGED
@@ -6,4 +6,4 @@ date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum
|
|
6 |
2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
|
7 |
2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
|
8 |
2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
|
9 |
-
2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,
|
|
|
6 |
2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
|
7 |
2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
|
8 |
2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
|
9 |
+
2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,10265,110,86,Tuesday
|
requirements.txt
CHANGED
@@ -9,3 +9,4 @@ plotly
|
|
9 |
http.client
|
10 |
datetime
|
11 |
huggingface-hub
|
|
|
|
9 |
http.client
|
10 |
datetime
|
11 |
huggingface-hub
|
12 |
+
python-dotenv
|
scalers/feature_scaler_NO2.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:005a752194f98e66653af7e3b3461c788fe9a902fb14e1b526aea7ea07201c48
|
3 |
+
size 1487
|
scalers/feature_scaler_O3.joblib
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:389fb707d241a8df5c7a228e4aa3ca1ebf434a0c551bdbd66f000cf2e5850fb1
|
3 |
+
size 1375
|
src/data_loading.py
CHANGED
@@ -1,89 +1,12 @@
|
|
1 |
import numpy as np
|
2 |
import pandas as pd
|
|
|
3 |
|
4 |
-
|
5 |
-
def create_lag_features_for_single_day(data, random_index, lag_days):
|
6 |
-
lag_features = [
|
7 |
-
column
|
8 |
-
for column in data.columns
|
9 |
-
if column
|
10 |
-
in [
|
11 |
-
"O3",
|
12 |
-
"NO2",
|
13 |
-
"wind_speed",
|
14 |
-
"mean_temp",
|
15 |
-
"global_radiation",
|
16 |
-
"percipitation",
|
17 |
-
"pressure",
|
18 |
-
"minimum_visibility",
|
19 |
-
"humidity",
|
20 |
-
]
|
21 |
-
]
|
22 |
-
lagged_data = {}
|
23 |
-
for feature in lag_features:
|
24 |
-
for lag in range(1, lag_days + 1):
|
25 |
-
try:
|
26 |
-
lagged_value = data.loc[random_index - lag, feature]
|
27 |
-
lagged_data[f"{feature}_lag_{lag}"] = lagged_value
|
28 |
-
except IndexError:
|
29 |
-
print(
|
30 |
-
f"Value not found for feature {feature} lagged by {lag} from day {random_index}"
|
31 |
-
)
|
32 |
-
continue
|
33 |
-
|
34 |
-
# Add together lagged features, non-lagged features and date
|
35 |
-
current_data = data.iloc[random_index].to_dict()
|
36 |
-
current_data.update(lagged_data)
|
37 |
-
return pd.DataFrame([current_data])
|
38 |
-
|
39 |
-
|
40 |
-
def create_targets_for_single_day(data, random_index, target_column, days_ahead):
|
41 |
-
targets = {}
|
42 |
-
for day in range(1, days_ahead + 1):
|
43 |
-
future_index = random_index + day
|
44 |
-
try:
|
45 |
-
targets[f"{target_column}_{day}_days_ahead"] = data.loc[
|
46 |
-
future_index, target_column
|
47 |
-
]
|
48 |
-
except IndexError:
|
49 |
-
print(
|
50 |
-
f"Value not found for particle {target_column} forwarded by {day} day"
|
51 |
-
)
|
52 |
-
|
53 |
-
return pd.DataFrame([targets])
|
54 |
-
|
55 |
-
|
56 |
-
def load_data_batch(data, target_particle, lag_days):
|
57 |
-
data["date"] = pd.to_datetime(data["date"])
|
58 |
-
|
59 |
-
# Exclude period with missing O3 data + buffer before and after for targets and lag features
|
60 |
-
start_exclusion = pd.to_datetime("2022-01-01") - pd.Timedelta(days=3)
|
61 |
-
end_exclusion = pd.to_datetime("2022-04-27") + pd.Timedelta(days=lag_days)
|
62 |
-
valid_data = data[
|
63 |
-
~((data["date"] >= start_exclusion) & (data["date"] <= end_exclusion))
|
64 |
-
]
|
65 |
-
valid_data = valid_data[
|
66 |
-
lag_days:-3
|
67 |
-
] # also exclude first seven and last three days of the dataset
|
68 |
-
|
69 |
-
# Get random day in the valid data
|
70 |
-
random_index = np.random.choice(valid_data.index, 1)[0]
|
71 |
-
|
72 |
-
# Create lag features for the selected day
|
73 |
-
train_data = create_lag_features_for_single_day(data, random_index, lag_days)
|
74 |
-
targets = create_targets_for_single_day(
|
75 |
-
data, random_index, target_particle, days_ahead=3
|
76 |
-
)
|
77 |
-
|
78 |
-
return train_data, targets
|
79 |
-
|
80 |
-
|
81 |
-
def create_features_and_targets(
|
82 |
data,
|
83 |
target_particle, # Added this parameter
|
84 |
lag_days=7,
|
85 |
sma_days=7,
|
86 |
-
days_ahead=3,
|
87 |
):
|
88 |
"""
|
89 |
Creates lagged features, SMA features, last year's particle data (NO2 and O3) for specific days,
|
@@ -199,7 +122,7 @@ def create_features_and_targets(
|
|
199 |
|
200 |
|
201 |
# Initialize scalers
|
202 |
-
feature_scaler =
|
203 |
|
204 |
# Fit the scalers on the training data
|
205 |
X_scaled = feature_scaler.fit_transform(x)
|
@@ -209,4 +132,4 @@ def create_features_and_targets(
|
|
209 |
X_scaled, columns=feature_cols, index=x.index
|
210 |
)
|
211 |
|
212 |
-
return
|
|
|
1 |
import numpy as np
|
2 |
import pandas as pd
|
3 |
+
import joblib
|
4 |
|
5 |
+
def create_features(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
data,
|
7 |
target_particle, # Added this parameter
|
8 |
lag_days=7,
|
9 |
sma_days=7,
|
|
|
10 |
):
|
11 |
"""
|
12 |
Creates lagged features, SMA features, last year's particle data (NO2 and O3) for specific days,
|
|
|
122 |
|
123 |
|
124 |
# Initialize scalers
|
125 |
+
feature_scaler = joblib.load(f"scalers/feature_scaler_{target_particle}.joblib")
|
126 |
|
127 |
# Fit the scalers on the training data
|
128 |
X_scaled = feature_scaler.fit_transform(x)
|
|
|
132 |
X_scaled, columns=feature_cols, index=x.index
|
133 |
)
|
134 |
|
135 |
+
return X_scaled
|
src/models_loading.py
CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
|
|
5 |
import streamlit as st
|
6 |
from dotenv import load_dotenv
|
7 |
from huggingface_hub import hf_hub_download, login
|
8 |
-
|
9 |
|
10 |
def load_model(particle):
|
11 |
load_dotenv()
|
@@ -24,14 +24,12 @@ def load_model(particle):
|
|
24 |
|
25 |
|
26 |
@st.cache_resource(ttl=6 * 300) # Reruns every 6 hours
|
27 |
-
def run_model(particle):
|
|
|
28 |
model = load_model(particle)
|
29 |
|
30 |
-
# Static input values
|
31 |
-
input_data = pd.DataFrame(
|
32 |
-
{"Temperature": [20.0], "Wind Speed": [10.0], "Humidity": [50.0]}
|
33 |
-
)
|
34 |
-
|
35 |
# Run the model with static input
|
36 |
prediction = model.predict(input_data)
|
|
|
|
|
37 |
return prediction
|
|
|
5 |
import streamlit as st
|
6 |
from dotenv import load_dotenv
|
7 |
from huggingface_hub import hf_hub_download, login
|
8 |
+
from src.data_loading import create_features
|
9 |
|
10 |
def load_model(particle):
|
11 |
load_dotenv()
|
|
|
24 |
|
25 |
|
26 |
@st.cache_resource(ttl=6 * 300) # Reruns every 6 hours
|
27 |
+
def run_model(particle, data):
|
28 |
+
input_data = create_features(data=data, target_particle=particle)
|
29 |
model = load_model(particle)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
31 |
# Run the model with static input
|
32 |
prediction = model.predict(input_data)
|
33 |
+
target_scaler = joblib.load(f"scalers/target_scaler_{particle}.joblib")
|
34 |
+
prediction = target_scaler.inverse_transform(prediction)
|
35 |
return prediction
|
test.ipynb
CHANGED
@@ -45,115 +45,40 @@
|
|
45 |
},
|
46 |
{
|
47 |
"cell_type": "code",
|
48 |
-
"execution_count":
|
49 |
"metadata": {},
|
50 |
"outputs": [
|
51 |
{
|
52 |
"data": {
|
53 |
-
"text/html": [
|
54 |
-
"<div>\n",
|
55 |
-
"<style scoped>\n",
|
56 |
-
" .dataframe tbody tr th:only-of-type {\n",
|
57 |
-
" vertical-align: middle;\n",
|
58 |
-
" }\n",
|
59 |
-
"\n",
|
60 |
-
" .dataframe tbody tr th {\n",
|
61 |
-
" vertical-align: top;\n",
|
62 |
-
" }\n",
|
63 |
-
"\n",
|
64 |
-
" .dataframe thead th {\n",
|
65 |
-
" text-align: right;\n",
|
66 |
-
" }\n",
|
67 |
-
"</style>\n",
|
68 |
-
"<table border=\"1\" class=\"dataframe\">\n",
|
69 |
-
" <thead>\n",
|
70 |
-
" <tr style=\"text-align: right;\">\n",
|
71 |
-
" <th></th>\n",
|
72 |
-
" <th>NO2</th>\n",
|
73 |
-
" <th>O3</th>\n",
|
74 |
-
" <th>wind_speed</th>\n",
|
75 |
-
" <th>mean_temp</th>\n",
|
76 |
-
" <th>global_radiation</th>\n",
|
77 |
-
" <th>percipitation</th>\n",
|
78 |
-
" <th>pressure</th>\n",
|
79 |
-
" <th>minimum_visibility</th>\n",
|
80 |
-
" <th>humidity</th>\n",
|
81 |
-
" <th>weekday_sin</th>\n",
|
82 |
-
" <th>...</th>\n",
|
83 |
-
" <th>O3_last_year_4_days_before</th>\n",
|
84 |
-
" <th>NO2_last_year_4_days_before</th>\n",
|
85 |
-
" <th>O3_last_year_5_days_before</th>\n",
|
86 |
-
" <th>NO2_last_year_5_days_before</th>\n",
|
87 |
-
" <th>O3_last_year_6_days_before</th>\n",
|
88 |
-
" <th>NO2_last_year_6_days_before</th>\n",
|
89 |
-
" <th>O3_last_year_7_days_before</th>\n",
|
90 |
-
" <th>NO2_last_year_7_days_before</th>\n",
|
91 |
-
" <th>O3_last_year_3_days_after</th>\n",
|
92 |
-
" <th>NO2_last_year_3_days_after</th>\n",
|
93 |
-
" </tr>\n",
|
94 |
-
" </thead>\n",
|
95 |
-
" <tbody>\n",
|
96 |
-
" <tr>\n",
|
97 |
-
" <th>0</th>\n",
|
98 |
-
" <td>20.281667</td>\n",
|
99 |
-
" <td>25.787521</td>\n",
|
100 |
-
" <td>76</td>\n",
|
101 |
-
" <td>121</td>\n",
|
102 |
-
" <td>54</td>\n",
|
103 |
-
" <td>97</td>\n",
|
104 |
-
" <td>10266</td>\n",
|
105 |
-
" <td>116</td>\n",
|
106 |
-
" <td>87</td>\n",
|
107 |
-
" <td>0.781831</td>\n",
|
108 |
-
" <td>...</td>\n",
|
109 |
-
" <td>0</td>\n",
|
110 |
-
" <td>0</td>\n",
|
111 |
-
" <td>0</td>\n",
|
112 |
-
" <td>0</td>\n",
|
113 |
-
" <td>0</td>\n",
|
114 |
-
" <td>0</td>\n",
|
115 |
-
" <td>0</td>\n",
|
116 |
-
" <td>0</td>\n",
|
117 |
-
" <td>0</td>\n",
|
118 |
-
" <td>0</td>\n",
|
119 |
-
" </tr>\n",
|
120 |
-
" </tbody>\n",
|
121 |
-
"</table>\n",
|
122 |
-
"<p>1 rows × 103 columns</p>\n",
|
123 |
-
"</div>"
|
124 |
-
],
|
125 |
"text/plain": [
|
126 |
-
"
|
127 |
-
"
|
128 |
-
"
|
129 |
-
"
|
130 |
-
"
|
131 |
-
"
|
132 |
-
"
|
133 |
-
"
|
134 |
-
"
|
135 |
-
"
|
136 |
-
"0 0 0 \n",
|
137 |
-
"\n",
|
138 |
-
" O3_last_year_6_days_before NO2_last_year_6_days_before \\\n",
|
139 |
-
"0 0 0 \n",
|
140 |
-
"\n",
|
141 |
-
" O3_last_year_7_days_before NO2_last_year_7_days_before \\\n",
|
142 |
-
"0 0 0 \n",
|
143 |
-
"\n",
|
144 |
-
" O3_last_year_3_days_after NO2_last_year_3_days_after \n",
|
145 |
-
"0 0 0 \n",
|
146 |
-
"\n",
|
147 |
-
"[1 rows x 103 columns]"
|
148 |
]
|
149 |
},
|
150 |
-
"execution_count":
|
151 |
"metadata": {},
|
152 |
"output_type": "execute_result"
|
153 |
}
|
154 |
],
|
155 |
"source": [
|
156 |
-
"test_data"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
157 |
]
|
158 |
},
|
159 |
{
|
@@ -162,6 +87,51 @@
|
|
162 |
"metadata": {},
|
163 |
"outputs": [],
|
164 |
"source": []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
165 |
}
|
166 |
],
|
167 |
"metadata": {
|
|
|
45 |
},
|
46 |
{
|
47 |
"cell_type": "code",
|
48 |
+
"execution_count": 11,
|
49 |
"metadata": {},
|
50 |
"outputs": [
|
51 |
{
|
52 |
"data": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
"text/plain": [
|
54 |
+
"Index(['NO2', 'O3', 'wind_speed', 'mean_temp', 'global_radiation',\n",
|
55 |
+
" 'percipitation', 'pressure', 'minimum_visibility', 'humidity',\n",
|
56 |
+
" 'weekday_sin',\n",
|
57 |
+
" ...\n",
|
58 |
+
" 'O3_last_year_4_days_before', 'NO2_last_year_4_days_before',\n",
|
59 |
+
" 'O3_last_year_5_days_before', 'NO2_last_year_5_days_before',\n",
|
60 |
+
" 'O3_last_year_6_days_before', 'NO2_last_year_6_days_before',\n",
|
61 |
+
" 'O3_last_year_7_days_before', 'NO2_last_year_7_days_before',\n",
|
62 |
+
" 'O3_last_year_3_days_after', 'NO2_last_year_3_days_after'],\n",
|
63 |
+
" dtype='object', length=103)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
64 |
]
|
65 |
},
|
66 |
+
"execution_count": 11,
|
67 |
"metadata": {},
|
68 |
"output_type": "execute_result"
|
69 |
}
|
70 |
],
|
71 |
"source": [
|
72 |
+
"test_data.columns"
|
73 |
+
]
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"cell_type": "code",
|
77 |
+
"execution_count": 8,
|
78 |
+
"metadata": {},
|
79 |
+
"outputs": [],
|
80 |
+
"source": [
|
81 |
+
"from src.models_loading import run_model"
|
82 |
]
|
83 |
},
|
84 |
{
|
|
|
87 |
"metadata": {},
|
88 |
"outputs": [],
|
89 |
"source": []
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"cell_type": "code",
|
93 |
+
"execution_count": 12,
|
94 |
+
"metadata": {},
|
95 |
+
"outputs": [
|
96 |
+
{
|
97 |
+
"name": "stderr",
|
98 |
+
"output_type": "stream",
|
99 |
+
"text": [
|
100 |
+
"2024-10-22 21:43:37.935 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
|
101 |
+
"2024-10-22 21:43:37.938 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
|
102 |
+
"2024-10-22 21:43:37.939 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
|
103 |
+
"2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
|
104 |
+
"2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
|
105 |
+
]
|
106 |
+
},
|
107 |
+
{
|
108 |
+
"name": "stdout",
|
109 |
+
"output_type": "stream",
|
110 |
+
"text": [
|
111 |
+
"Number of rows with missing values dropped: 7\n"
|
112 |
+
]
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"ename": "FileNotFoundError",
|
116 |
+
"evalue": "[Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'",
|
117 |
+
"output_type": "error",
|
118 |
+
"traceback": [
|
119 |
+
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
|
120 |
+
"\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
|
121 |
+
"Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m)\u001b[49m\n",
|
122 |
+
"File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:210\u001b[0m, in \u001b[0;36mCachedFunc.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m spinner(message, _cache\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_or_create_cached_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_or_create_cached_value(args, kwargs)\n",
|
123 |
+
"File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:235\u001b[0m, in \u001b[0;36mCachedFunc._get_or_create_cached_value\u001b[0;34m(self, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m 233\u001b[0m cached_result \u001b[38;5;241m=\u001b[39m cache\u001b[38;5;241m.\u001b[39mread_result(value_key)\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handle_cache_hit(cached_result)\n\u001b[0;32m--> 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_cache_miss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
|
124 |
+
"File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:292\u001b[0m, in \u001b[0;36mCachedFunc._handle_cache_miss\u001b[0;34m(self, cache, value_key, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;66;03m# We acquired the lock before any other thread. Compute the value!\u001b[39;00m\n\u001b[1;32m 289\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39mcalling_cached_function(\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mfunc\n\u001b[1;32m 291\u001b[0m ):\n\u001b[0;32m--> 292\u001b[0m computed_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_info\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;66;03m# We've computed our value, and now we need to write it back to the cache\u001b[39;00m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;66;03m# along with any \"replay messages\" that were generated during value computation.\u001b[39;00m\n\u001b[1;32m 296\u001b[0m messages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39m_most_recent_messages\n",
|
125 |
+
"File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/models_loading.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[0;34m(particle, data)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;129m@st\u001b[39m\u001b[38;5;241m.\u001b[39mcache_resource(ttl\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m300\u001b[39m) \u001b[38;5;66;03m# Reruns every 6 hours\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[0;32m---> 28\u001b[0m input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 29\u001b[0m model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# Run the model with static input\u001b[39;00m\n",
|
126 |
+
"File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/data_loading.py:125\u001b[0m, in \u001b[0;36mcreate_features\u001b[0;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[1;32m 121\u001b[0m x \u001b[38;5;241m=\u001b[39m data[feature_cols]\n\u001b[1;32m 124\u001b[0m \u001b[38;5;66;03m# Initialize scalers\u001b[39;00m\n\u001b[0;32m--> 125\u001b[0m feature_scaler \u001b[38;5;241m=\u001b[39m \u001b[43mjoblib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../scalers/feature_scaler_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.joblib\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;66;03m# Fit the scalers on the training data\u001b[39;00m\n\u001b[1;32m 128\u001b[0m X_scaled \u001b[38;5;241m=\u001b[39m feature_scaler\u001b[38;5;241m.\u001b[39mfit_transform(x)\n",
|
127 |
+
"File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/joblib/numpy_pickle.py:650\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename, mmap_mode)\u001b[0m\n\u001b[1;32m 648\u001b[0m obj \u001b[38;5;241m=\u001b[39m _unpickle(fobj)\n\u001b[1;32m 649\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 650\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 651\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _read_fileobject(f, filename, mmap_mode) \u001b[38;5;28;01mas\u001b[39;00m fobj:\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# if the returned file object is a string, this means we\u001b[39;00m\n\u001b[1;32m 654\u001b[0m \u001b[38;5;66;03m# try to load a pickle file generated with an version of\u001b[39;00m\n\u001b[1;32m 655\u001b[0m \u001b[38;5;66;03m# Joblib so we load it with joblib compatibility function.\u001b[39;00m\n",
|
128 |
+
"\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'"
|
129 |
+
]
|
130 |
+
}
|
131 |
+
],
|
132 |
+
"source": [
|
133 |
+
"prediction = run_model(\"O3\", data=dataset)"
|
134 |
+
]
|
135 |
}
|
136 |
],
|
137 |
"metadata": {
|
test.py
CHANGED
@@ -1,13 +1,7 @@
|
|
1 |
-
from data_loading import create_features_and_targets
|
2 |
-
from data_api_calls import get_data
|
3 |
import pandas as pd
|
4 |
|
|
|
5 |
dataset = pd.read_csv("dataset.csv")
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
target_particle="NO2",
|
10 |
-
lag_days=7,
|
11 |
-
sma_days=7,
|
12 |
-
days_ahead=3,
|
13 |
-
)
|
|
|
|
|
|
|
1 |
import pandas as pd
|
2 |
|
3 |
+
from src.models_loading import run_model
|
4 |
dataset = pd.read_csv("dataset.csv")
|
5 |
+
prediction = run_model("O3", data=dataset)
|
6 |
+
print(type(prediction))
|
7 |
+
print(prediction)
|
|
|
|
|
|
|
|
|
|