Mihkelmj commited on
Commit
5064f83
1 Parent(s): 9aa7aec

app.py gets the data and runs the model; last year features to be implemented

Browse files
__pycache__/data_api_calls.cpython-312.pyc CHANGED
Binary files a/__pycache__/data_api_calls.cpython-312.pyc and b/__pycache__/data_api_calls.cpython-312.pyc differ
 
app.py CHANGED
@@ -2,9 +2,10 @@ import altair as alt
2
  import pandas as pd
3
  import plotly.graph_objects as go
4
  import streamlit as st
 
 
5
  from src.helper_functions import custom_metric_box, pollution_box
6
  from src.models_loading import run_model
7
- from data_api_calls import get_data
8
 
9
  st.set_page_config(
10
  page_title="Utrecht Pollution Dashboard",
@@ -15,10 +16,12 @@ st.set_page_config(
15
 
16
  alt.themes.enable("dark")
17
 
18
- test_predictions = run_model("O3")
19
  get_data()
20
-
21
- data = pd.read_csv("dataset.csv")
 
 
 
22
 
23
  # App Title
24
  st.title("Utrecht Pollution Dashboard🌱")
@@ -54,23 +57,24 @@ with col1:
54
  pollution_box(label="NO<sub>2</sub>", value="28 µg/m³", delta="+3 µg/m³")
55
 
56
  # Sample data (replace with your actual data)
57
- dates_past = pd.date_range(end=pd.Timestamp.today(), periods=7).to_list()
58
- dates_future = pd.date_range(
59
- start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3
60
- ).to_list()
61
 
62
  # O3 and NO2 values for the past 7 days
63
- o3_past_values = [30, 32, 34, 33, 31, 35, 36]
64
- no2_past_values = [20, 22, 21, 23, 22, 24, 25]
 
 
 
 
65
 
66
- # Predicted O3 and NO2 values for the next 3 days
67
- o3_future_values = [37, 38, 40]
68
- no2_future_values = [26, 27, 28]
69
 
70
  # Combine dates and values
71
  dates = dates_past + dates_future
72
- o3_values = o3_past_values + o3_future_values
73
- no2_values = no2_past_values + no2_future_values
74
 
75
  # Create a DataFrame
76
  df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
 
2
  import pandas as pd
3
  import plotly.graph_objects as go
4
  import streamlit as st
5
+
6
+ from data_api_calls import get_data
7
  from src.helper_functions import custom_metric_box, pollution_box
8
  from src.models_loading import run_model
 
9
 
10
  st.set_page_config(
11
  page_title="Utrecht Pollution Dashboard",
 
16
 
17
  alt.themes.enable("dark")
18
 
 
19
  get_data()
20
+ dataset = pd.read_csv("dataset.csv")
21
+ prediction = run_model("O3", data=dataset)
22
+ pred1 = prediction[0][0]
23
+ pred2 = prediction[0][1]
24
+ pred3 = prediction[0][2]
25
 
26
  # App Title
27
  st.title("Utrecht Pollution Dashboard🌱")
 
57
  pollution_box(label="NO<sub>2</sub>", value="28 µg/m³", delta="+3 µg/m³")
58
 
59
  # Sample data (replace with your actual data)
60
+ # Sample data (replace with your actual data)
61
+ dates_past = pd.date_range(end=pd.Timestamp.today(), periods=8).to_list()
62
+ dates_future = pd.date_range(start=pd.Timestamp.today() + pd.Timedelta(days=1), periods=3).to_list()
 
63
 
64
  # O3 and NO2 values for the past 7 days
65
+ o3_past_values = dataset["O3"]
66
+ no2_past_values = dataset["NO2"]
67
+
68
+ # Predicted O3 and NO2 values for the next 3 days (convert to pandas Series)
69
+ o3_future_values = pd.Series(prediction[0].flatten()) # Flatten the array to 1D
70
+ no2_future_values = pd.Series([26, 27, 28]) # Example prediction data
71
 
72
+ # Combine the past and future values using pd.concat
73
+ o3_values = pd.concat([o3_past_values, o3_future_values], ignore_index=True)
74
+ no2_values = pd.concat([no2_past_values, no2_future_values], ignore_index=True)
75
 
76
  # Combine dates and values
77
  dates = dates_past + dates_future
 
 
78
 
79
  # Create a DataFrame
80
  df = pd.DataFrame({"Date": dates, "O3": o3_values, "NO2": no2_values})
dataset.csv CHANGED
@@ -6,4 +6,4 @@ date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum
6
  2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
7
  2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
8
  2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
9
- 2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,10266,116,87,Tuesday
 
6
  2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
7
  2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
8
  2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
9
+ 2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,10265,110,86,Tuesday
requirements.txt CHANGED
@@ -9,3 +9,4 @@ plotly
9
  http.client
10
  datetime
11
  huggingface-hub
 
 
9
  http.client
10
  datetime
11
  huggingface-hub
12
+ python-dotenv
scalers/feature_scaler_NO2.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:005a752194f98e66653af7e3b3461c788fe9a902fb14e1b526aea7ea07201c48
3
+ size 1487
scalers/feature_scaler_O3.joblib ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:389fb707d241a8df5c7a228e4aa3ca1ebf434a0c551bdbd66f000cf2e5850fb1
3
+ size 1375
src/data_loading.py CHANGED
@@ -1,89 +1,12 @@
1
  import numpy as np
2
  import pandas as pd
 
3
 
4
-
5
- def create_lag_features_for_single_day(data, random_index, lag_days):
6
- lag_features = [
7
- column
8
- for column in data.columns
9
- if column
10
- in [
11
- "O3",
12
- "NO2",
13
- "wind_speed",
14
- "mean_temp",
15
- "global_radiation",
16
- "percipitation",
17
- "pressure",
18
- "minimum_visibility",
19
- "humidity",
20
- ]
21
- ]
22
- lagged_data = {}
23
- for feature in lag_features:
24
- for lag in range(1, lag_days + 1):
25
- try:
26
- lagged_value = data.loc[random_index - lag, feature]
27
- lagged_data[f"{feature}_lag_{lag}"] = lagged_value
28
- except IndexError:
29
- print(
30
- f"Value not found for feature {feature} lagged by {lag} from day {random_index}"
31
- )
32
- continue
33
-
34
- # Add together lagged features, non-lagged features and date
35
- current_data = data.iloc[random_index].to_dict()
36
- current_data.update(lagged_data)
37
- return pd.DataFrame([current_data])
38
-
39
-
40
- def create_targets_for_single_day(data, random_index, target_column, days_ahead):
41
- targets = {}
42
- for day in range(1, days_ahead + 1):
43
- future_index = random_index + day
44
- try:
45
- targets[f"{target_column}_{day}_days_ahead"] = data.loc[
46
- future_index, target_column
47
- ]
48
- except IndexError:
49
- print(
50
- f"Value not found for particle {target_column} forwarded by {day} day"
51
- )
52
-
53
- return pd.DataFrame([targets])
54
-
55
-
56
- def load_data_batch(data, target_particle, lag_days):
57
- data["date"] = pd.to_datetime(data["date"])
58
-
59
- # Exclude period with missing O3 data + buffer before and after for targets and lag features
60
- start_exclusion = pd.to_datetime("2022-01-01") - pd.Timedelta(days=3)
61
- end_exclusion = pd.to_datetime("2022-04-27") + pd.Timedelta(days=lag_days)
62
- valid_data = data[
63
- ~((data["date"] >= start_exclusion) & (data["date"] <= end_exclusion))
64
- ]
65
- valid_data = valid_data[
66
- lag_days:-3
67
- ] # also exclude first seven and last three days of the dataset
68
-
69
- # Get random day in the valid data
70
- random_index = np.random.choice(valid_data.index, 1)[0]
71
-
72
- # Create lag features for the selected day
73
- train_data = create_lag_features_for_single_day(data, random_index, lag_days)
74
- targets = create_targets_for_single_day(
75
- data, random_index, target_particle, days_ahead=3
76
- )
77
-
78
- return train_data, targets
79
-
80
-
81
- def create_features_and_targets(
82
  data,
83
  target_particle, # Added this parameter
84
  lag_days=7,
85
  sma_days=7,
86
- days_ahead=3,
87
  ):
88
  """
89
  Creates lagged features, SMA features, last year's particle data (NO2 and O3) for specific days,
@@ -199,7 +122,7 @@ def create_features_and_targets(
199
 
200
 
201
  # Initialize scalers
202
- feature_scaler = StandardScaler()
203
 
204
  # Fit the scalers on the training data
205
  X_scaled = feature_scaler.fit_transform(x)
@@ -209,4 +132,4 @@ def create_features_and_targets(
209
  X_scaled, columns=feature_cols, index=x.index
210
  )
211
 
212
- return x
 
1
  import numpy as np
2
  import pandas as pd
3
+ import joblib
4
 
5
+ def create_features(
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  data,
7
  target_particle, # Added this parameter
8
  lag_days=7,
9
  sma_days=7,
 
10
  ):
11
  """
12
  Creates lagged features, SMA features, last year's particle data (NO2 and O3) for specific days,
 
122
 
123
 
124
  # Initialize scalers
125
+ feature_scaler = joblib.load(f"scalers/feature_scaler_{target_particle}.joblib")
126
 
127
  # Fit the scalers on the training data
128
  X_scaled = feature_scaler.fit_transform(x)
 
132
  X_scaled, columns=feature_cols, index=x.index
133
  )
134
 
135
+ return X_scaled
src/models_loading.py CHANGED
@@ -5,7 +5,7 @@ import pandas as pd
5
  import streamlit as st
6
  from dotenv import load_dotenv
7
  from huggingface_hub import hf_hub_download, login
8
-
9
 
10
  def load_model(particle):
11
  load_dotenv()
@@ -24,14 +24,12 @@ def load_model(particle):
24
 
25
 
26
  @st.cache_resource(ttl=6 * 300) # Reruns every 6 hours
27
- def run_model(particle):
 
28
  model = load_model(particle)
29
 
30
- # Static input values
31
- input_data = pd.DataFrame(
32
- {"Temperature": [20.0], "Wind Speed": [10.0], "Humidity": [50.0]}
33
- )
34
-
35
  # Run the model with static input
36
  prediction = model.predict(input_data)
 
 
37
  return prediction
 
5
  import streamlit as st
6
  from dotenv import load_dotenv
7
  from huggingface_hub import hf_hub_download, login
8
+ from src.data_loading import create_features
9
 
10
  def load_model(particle):
11
  load_dotenv()
 
24
 
25
 
26
  @st.cache_resource(ttl=6 * 300) # Reruns every 6 hours
27
+ def run_model(particle, data):
28
+ input_data = create_features(data=data, target_particle=particle)
29
  model = load_model(particle)
30
 
 
 
 
 
 
31
  # Run the model with static input
32
  prediction = model.predict(input_data)
33
+ target_scaler = joblib.load(f"scalers/target_scaler_{particle}.joblib")
34
+ prediction = target_scaler.inverse_transform(prediction)
35
  return prediction
test.ipynb CHANGED
@@ -45,115 +45,40 @@
45
  },
46
  {
47
  "cell_type": "code",
48
- "execution_count": 5,
49
  "metadata": {},
50
  "outputs": [
51
  {
52
  "data": {
53
- "text/html": [
54
- "<div>\n",
55
- "<style scoped>\n",
56
- " .dataframe tbody tr th:only-of-type {\n",
57
- " vertical-align: middle;\n",
58
- " }\n",
59
- "\n",
60
- " .dataframe tbody tr th {\n",
61
- " vertical-align: top;\n",
62
- " }\n",
63
- "\n",
64
- " .dataframe thead th {\n",
65
- " text-align: right;\n",
66
- " }\n",
67
- "</style>\n",
68
- "<table border=\"1\" class=\"dataframe\">\n",
69
- " <thead>\n",
70
- " <tr style=\"text-align: right;\">\n",
71
- " <th></th>\n",
72
- " <th>NO2</th>\n",
73
- " <th>O3</th>\n",
74
- " <th>wind_speed</th>\n",
75
- " <th>mean_temp</th>\n",
76
- " <th>global_radiation</th>\n",
77
- " <th>percipitation</th>\n",
78
- " <th>pressure</th>\n",
79
- " <th>minimum_visibility</th>\n",
80
- " <th>humidity</th>\n",
81
- " <th>weekday_sin</th>\n",
82
- " <th>...</th>\n",
83
- " <th>O3_last_year_4_days_before</th>\n",
84
- " <th>NO2_last_year_4_days_before</th>\n",
85
- " <th>O3_last_year_5_days_before</th>\n",
86
- " <th>NO2_last_year_5_days_before</th>\n",
87
- " <th>O3_last_year_6_days_before</th>\n",
88
- " <th>NO2_last_year_6_days_before</th>\n",
89
- " <th>O3_last_year_7_days_before</th>\n",
90
- " <th>NO2_last_year_7_days_before</th>\n",
91
- " <th>O3_last_year_3_days_after</th>\n",
92
- " <th>NO2_last_year_3_days_after</th>\n",
93
- " </tr>\n",
94
- " </thead>\n",
95
- " <tbody>\n",
96
- " <tr>\n",
97
- " <th>0</th>\n",
98
- " <td>20.281667</td>\n",
99
- " <td>25.787521</td>\n",
100
- " <td>76</td>\n",
101
- " <td>121</td>\n",
102
- " <td>54</td>\n",
103
- " <td>97</td>\n",
104
- " <td>10266</td>\n",
105
- " <td>116</td>\n",
106
- " <td>87</td>\n",
107
- " <td>0.781831</td>\n",
108
- " <td>...</td>\n",
109
- " <td>0</td>\n",
110
- " <td>0</td>\n",
111
- " <td>0</td>\n",
112
- " <td>0</td>\n",
113
- " <td>0</td>\n",
114
- " <td>0</td>\n",
115
- " <td>0</td>\n",
116
- " <td>0</td>\n",
117
- " <td>0</td>\n",
118
- " <td>0</td>\n",
119
- " </tr>\n",
120
- " </tbody>\n",
121
- "</table>\n",
122
- "<p>1 rows × 103 columns</p>\n",
123
- "</div>"
124
- ],
125
  "text/plain": [
126
- " NO2 O3 wind_speed mean_temp global_radiation \\\n",
127
- "0 20.281667 25.787521 76 121 54 \n",
128
- "\n",
129
- " percipitation pressure minimum_visibility humidity weekday_sin ... \\\n",
130
- "0 97 10266 116 87 0.781831 ... \n",
131
- "\n",
132
- " O3_last_year_4_days_before NO2_last_year_4_days_before \\\n",
133
- "0 0 0 \n",
134
- "\n",
135
- " O3_last_year_5_days_before NO2_last_year_5_days_before \\\n",
136
- "0 0 0 \n",
137
- "\n",
138
- " O3_last_year_6_days_before NO2_last_year_6_days_before \\\n",
139
- "0 0 0 \n",
140
- "\n",
141
- " O3_last_year_7_days_before NO2_last_year_7_days_before \\\n",
142
- "0 0 0 \n",
143
- "\n",
144
- " O3_last_year_3_days_after NO2_last_year_3_days_after \n",
145
- "0 0 0 \n",
146
- "\n",
147
- "[1 rows x 103 columns]"
148
  ]
149
  },
150
- "execution_count": 5,
151
  "metadata": {},
152
  "output_type": "execute_result"
153
  }
154
  ],
155
  "source": [
156
- "test_data"
 
 
 
 
 
 
 
 
 
157
  ]
158
  },
159
  {
@@ -162,6 +87,51 @@
162
  "metadata": {},
163
  "outputs": [],
164
  "source": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  }
166
  ],
167
  "metadata": {
 
45
  },
46
  {
47
  "cell_type": "code",
48
+ "execution_count": 11,
49
  "metadata": {},
50
  "outputs": [
51
  {
52
  "data": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  "text/plain": [
54
+ "Index(['NO2', 'O3', 'wind_speed', 'mean_temp', 'global_radiation',\n",
55
+ " 'percipitation', 'pressure', 'minimum_visibility', 'humidity',\n",
56
+ " 'weekday_sin',\n",
57
+ " ...\n",
58
+ " 'O3_last_year_4_days_before', 'NO2_last_year_4_days_before',\n",
59
+ " 'O3_last_year_5_days_before', 'NO2_last_year_5_days_before',\n",
60
+ " 'O3_last_year_6_days_before', 'NO2_last_year_6_days_before',\n",
61
+ " 'O3_last_year_7_days_before', 'NO2_last_year_7_days_before',\n",
62
+ " 'O3_last_year_3_days_after', 'NO2_last_year_3_days_after'],\n",
63
+ " dtype='object', length=103)"
 
 
 
 
 
 
 
 
 
 
 
 
64
  ]
65
  },
66
+ "execution_count": 11,
67
  "metadata": {},
68
  "output_type": "execute_result"
69
  }
70
  ],
71
  "source": [
72
+ "test_data.columns"
73
+ ]
74
+ },
75
+ {
76
+ "cell_type": "code",
77
+ "execution_count": 8,
78
+ "metadata": {},
79
+ "outputs": [],
80
+ "source": [
81
+ "from src.models_loading import run_model"
82
  ]
83
  },
84
  {
 
87
  "metadata": {},
88
  "outputs": [],
89
  "source": []
90
+ },
91
+ {
92
+ "cell_type": "code",
93
+ "execution_count": 12,
94
+ "metadata": {},
95
+ "outputs": [
96
+ {
97
+ "name": "stderr",
98
+ "output_type": "stream",
99
+ "text": [
100
+ "2024-10-22 21:43:37.935 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
101
+ "2024-10-22 21:43:37.938 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
102
+ "2024-10-22 21:43:37.939 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
103
+ "2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n",
104
+ "2024-10-22 21:43:37.980 Thread 'MainThread': missing ScriptRunContext! This warning can be ignored when running in bare mode.\n"
105
+ ]
106
+ },
107
+ {
108
+ "name": "stdout",
109
+ "output_type": "stream",
110
+ "text": [
111
+ "Number of rows with missing values dropped: 7\n"
112
+ ]
113
+ },
114
+ {
115
+ "ename": "FileNotFoundError",
116
+ "evalue": "[Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'",
117
+ "output_type": "error",
118
+ "traceback": [
119
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
120
+ "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)",
121
+ "Cell \u001b[0;32mIn[12], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m prediction \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m)\u001b[49m\n",
122
+ "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:210\u001b[0m, in \u001b[0;36mCachedFunc.__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mshow_spinner, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 209\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m spinner(message, _cache\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m):\n\u001b[0;32m--> 210\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_get_or_create_cached_value\u001b[49m\u001b[43m(\u001b[49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 211\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 212\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_get_or_create_cached_value(args, kwargs)\n",
123
+ "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:235\u001b[0m, in \u001b[0;36mCachedFunc._get_or_create_cached_value\u001b[0;34m(self, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m 233\u001b[0m cached_result \u001b[38;5;241m=\u001b[39m cache\u001b[38;5;241m.\u001b[39mread_result(value_key)\n\u001b[1;32m 234\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_handle_cache_hit(cached_result)\n\u001b[0;32m--> 235\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_handle_cache_miss\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcache\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue_key\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n",
124
+ "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/streamlit/runtime/caching/cache_utils.py:292\u001b[0m, in \u001b[0;36mCachedFunc._handle_cache_miss\u001b[0;34m(self, cache, value_key, func_args, func_kwargs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[38;5;66;03m# We acquired the lock before any other thread. Compute the value!\u001b[39;00m\n\u001b[1;32m 289\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39mcalling_cached_function(\n\u001b[1;32m 290\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mfunc\n\u001b[1;32m 291\u001b[0m ):\n\u001b[0;32m--> 292\u001b[0m computed_value \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_info\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_args\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfunc_kwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 294\u001b[0m \u001b[38;5;66;03m# We've computed our value, and now we need to write it back to the cache\u001b[39;00m\n\u001b[1;32m 295\u001b[0m \u001b[38;5;66;03m# along with any \"replay messages\" that were generated during value computation.\u001b[39;00m\n\u001b[1;32m 296\u001b[0m messages \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_info\u001b[38;5;241m.\u001b[39mcached_message_replay_ctx\u001b[38;5;241m.\u001b[39m_most_recent_messages\n",
125
+ "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/models_loading.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[0;34m(particle, data)\u001b[0m\n\u001b[1;32m 26\u001b[0m \u001b[38;5;129m@st\u001b[39m\u001b[38;5;241m.\u001b[39mcache_resource(ttl\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m6\u001b[39m \u001b[38;5;241m*\u001b[39m \u001b[38;5;241m300\u001b[39m) \u001b[38;5;66;03m# Reruns every 6 hours\u001b[39;00m\n\u001b[1;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[0;32m---> 28\u001b[0m input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 29\u001b[0m model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[1;32m 31\u001b[0m \u001b[38;5;66;03m# Run the model with static input\u001b[39;00m\n",
126
+ "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/src/data_loading.py:125\u001b[0m, in \u001b[0;36mcreate_features\u001b[0;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[1;32m 121\u001b[0m x \u001b[38;5;241m=\u001b[39m data[feature_cols]\n\u001b[1;32m 124\u001b[0m \u001b[38;5;66;03m# Initialize scalers\u001b[39;00m\n\u001b[0;32m--> 125\u001b[0m feature_scaler \u001b[38;5;241m=\u001b[39m \u001b[43mjoblib\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mload\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mf\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m../scalers/feature_scaler_\u001b[39;49m\u001b[38;5;132;43;01m{\u001b[39;49;00m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;132;43;01m}\u001b[39;49;00m\u001b[38;5;124;43m.joblib\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[1;32m 127\u001b[0m \u001b[38;5;66;03m# Fit the scalers on the training data\u001b[39;00m\n\u001b[1;32m 128\u001b[0m X_scaled \u001b[38;5;241m=\u001b[39m feature_scaler\u001b[38;5;241m.\u001b[39mfit_transform(x)\n",
127
+ "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/joblib/numpy_pickle.py:650\u001b[0m, in \u001b[0;36mload\u001b[0;34m(filename, mmap_mode)\u001b[0m\n\u001b[1;32m 648\u001b[0m obj \u001b[38;5;241m=\u001b[39m _unpickle(fobj)\n\u001b[1;32m 649\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 650\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m \u001b[38;5;28;43mopen\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfilename\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mrb\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m)\u001b[49m \u001b[38;5;28;01mas\u001b[39;00m f:\n\u001b[1;32m 651\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m _read_fileobject(f, filename, mmap_mode) \u001b[38;5;28;01mas\u001b[39;00m fobj:\n\u001b[1;32m 652\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(fobj, \u001b[38;5;28mstr\u001b[39m):\n\u001b[1;32m 653\u001b[0m \u001b[38;5;66;03m# if the returned file object is a string, this means we\u001b[39;00m\n\u001b[1;32m 654\u001b[0m \u001b[38;5;66;03m# try to load a pickle file generated with an version of\u001b[39;00m\n\u001b[1;32m 655\u001b[0m \u001b[38;5;66;03m# Joblib so we load it with joblib compatibility function.\u001b[39;00m\n",
128
+ "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: '../scalers/feature_scaler_O3.joblib'"
129
+ ]
130
+ }
131
+ ],
132
+ "source": [
133
+ "prediction = run_model(\"O3\", data=dataset)"
134
+ ]
135
  }
136
  ],
137
  "metadata": {
test.py CHANGED
@@ -1,13 +1,7 @@
1
- from data_loading import create_features_and_targets
2
- from data_api_calls import get_data
3
  import pandas as pd
4
 
 
5
  dataset = pd.read_csv("dataset.csv")
6
-
7
- X, y = create_features_and_targets(
8
- data=dataset,
9
- target_particle="NO2",
10
- lag_days=7,
11
- sma_days=7,
12
- days_ahead=3,
13
- )
 
 
 
1
  import pandas as pd
2
 
3
+ from src.models_loading import run_model
4
  dataset = pd.read_csv("dataset.csv")
5
+ prediction = run_model("O3", data=dataset)
6
+ print(type(prediction))
7
+ print(prediction)