Mihkelmj commited on
Commit
9aa7aec
2 Parent(s): 472271b 2c18c58

Merge branch 'mmj'

Browse files

"asdasd#"

t
exit

__pycache__/data_api_calls.cpython-312.pyc CHANGED
Binary files a/__pycache__/data_api_calls.cpython-312.pyc and b/__pycache__/data_api_calls.cpython-312.pyc differ
 
__pycache__/data_loading.cpython-312.pyc CHANGED
Binary files a/__pycache__/data_loading.cpython-312.pyc and b/__pycache__/data_loading.cpython-312.pyc differ
 
data_api_calls.py CHANGED
@@ -1,13 +1,15 @@
 
 
1
  import http.client
2
- from datetime import date, timedelta
3
- import pandas as pd
4
- from io import StringIO
5
  import os
6
  import re
7
- import csv
8
- import urllib.request
9
  import sys
10
- import codecs
 
 
 
 
 
11
 
12
  def pollution_data():
13
  particles = ["NO2", "O3"]
@@ -15,7 +17,7 @@ def pollution_data():
15
  all_dataframes = []
16
  today = date.today().isoformat() + "T09:00:00Z"
17
  yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
18
- latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
19
  days_today = 0
20
  days_yesterday = 1
21
  while(today != latest_date):
@@ -50,7 +52,7 @@ def clean_values():
50
  O3 = []
51
  today = date.today().isoformat() + "T09:00:00Z"
52
  yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
53
- latest_date = (date.today() - timedelta(7)).isoformat() + "T09:00:00Z"
54
  days_today = 0
55
  while(today != latest_date):
56
  for particle in particles:
@@ -140,10 +142,10 @@ def scale():
140
  df['humidity'] = df['humidity'].astype(int)
141
  df['global_radiation'] = df['global_radiation'].astype(int)
142
 
143
- df.to_csv('recorded_data.csv', index=False)
144
 
145
  def insert_pollution(NO2, O3):
146
- file_path = 'recorded_data.csv'
147
  df = pd.read_csv(file_path)
148
  start_index = 0
149
  while NO2:
@@ -157,7 +159,7 @@ def insert_pollution(NO2, O3):
157
 
158
  def weather_data():
159
  today = date.today().isoformat()
160
- seven_days = (date.today() - timedelta(6)).isoformat()
161
  try:
162
  ResultBytes = urllib.request.urlopen(f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Utrecht/{seven_days}/{today}?unitGroup=metric&elements=datetime%2Cwindspeed%2Ctemp%2Csolarradiation%2Cprecip%2Cpressure%2Cvisibility%2Chumidity&include=days&key=7Y6AY56M6RWVNHQ3SAVHNJWFS&maxStations=1&contentType=csv")
163
 
 
1
+ import codecs
2
+ import csv
3
  import http.client
 
 
 
4
  import os
5
  import re
 
 
6
  import sys
7
+ import urllib.request
8
+ from datetime import date, timedelta
9
+ from io import StringIO
10
+
11
+ import pandas as pd
12
+
13
 
14
  def pollution_data():
15
  particles = ["NO2", "O3"]
 
17
  all_dataframes = []
18
  today = date.today().isoformat() + "T09:00:00Z"
19
  yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
20
+ latest_date = (date.today() - timedelta(8)).isoformat() + "T09:00:00Z"
21
  days_today = 0
22
  days_yesterday = 1
23
  while(today != latest_date):
 
52
  O3 = []
53
  today = date.today().isoformat() + "T09:00:00Z"
54
  yesterday = (date.today() - timedelta(1)).isoformat() + "T09:00:00Z"
55
+ latest_date = (date.today() - timedelta(8)).isoformat() + "T09:00:00Z"
56
  days_today = 0
57
  while(today != latest_date):
58
  for particle in particles:
 
142
  df['humidity'] = df['humidity'].astype(int)
143
  df['global_radiation'] = df['global_radiation'].astype(int)
144
 
145
+ df.to_csv('dataset.csv', index=False)
146
 
147
  def insert_pollution(NO2, O3):
148
+ file_path = 'dataset.csv'
149
  df = pd.read_csv(file_path)
150
  start_index = 0
151
  while NO2:
 
159
 
160
  def weather_data():
161
  today = date.today().isoformat()
162
+ seven_days = (date.today() - timedelta(7)).isoformat()
163
  try:
164
  ResultBytes = urllib.request.urlopen(f"https://weather.visualcrossing.com/VisualCrossingWebServices/rest/services/timeline/Utrecht/{seven_days}/{today}?unitGroup=metric&elements=datetime%2Cwindspeed%2Ctemp%2Csolarradiation%2Cprecip%2Cpressure%2Cvisibility%2Chumidity&include=days&key=7Y6AY56M6RWVNHQ3SAVHNJWFS&maxStations=1&contentType=csv")
165
 
dataset.csv CHANGED
@@ -1,8 +1,9 @@
1
  date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum_visibility,humidity,weekday
 
2
  2024-10-16,22.4144459833795,22.78109803921569,61,151,40,0,10103,358,82,Wednesday
3
  2024-10-17,22.990465489566613,22.928154311649017,51,169,43,6,10100,371,86,Thursday
4
  2024-10-18,23.659013539651834,23.700536672629696,21,156,42,39,10140,64,97,Friday
5
  2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
6
  2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
7
  2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
8
- 2024-10-22,20.281666666666666,25.787520661157025,76,120,54,97,10266,128,87,Tuesday
 
1
  date,NO2,O3,wind_speed,mean_temp,global_radiation,percipitation,pressure,minimum_visibility,humidity,weekday
2
+ 2024-10-15,22.853627569528417,22.52299076212471,51,87,71,0,10194,290,86,Tuesday
3
  2024-10-16,22.4144459833795,22.78109803921569,61,151,40,0,10103,358,82,Wednesday
4
  2024-10-17,22.990465489566613,22.928154311649017,51,169,43,6,10100,371,86,Thursday
5
  2024-10-18,23.659013539651834,23.700536672629696,21,156,42,39,10140,64,97,Friday
6
  2024-10-19,24.727853658536585,23.52574561403509,43,147,43,28,10140,236,92,Saturday
7
  2024-10-20,22.700366666666664,24.317572254335257,68,145,0,0,10160,241,82,Sunday
8
  2024-10-21,19.763439153439155,25.661659574468086,66,142,27,39,10201,110,90,Monday
9
+ 2024-10-22,20.281666666666666,25.787520661157025,76,121,54,97,10266,116,87,Tuesday
src/data_loading.py CHANGED
@@ -162,17 +162,17 @@ def create_features_and_targets(
162
 
163
  # Create particle data (NO2 and O3) from the same time last year
164
  # Today last year
165
- data["O3_last_year"] = data["O3"].shift(365)
166
- data["NO2_last_year"] = data["NO2"].shift(365)
167
 
168
  # 7 days before today last year
169
  for i in range(1, lag_days + 1):
170
- data[f"O3_last_year_{i}_days_before"] = data["O3"].shift(365 + i)
171
- data[f"NO2_last_year_{i}_days_before"] = data["NO2"].shift(365 + i)
172
 
173
  # 3 days after today last year
174
- data["O3_last_year_3_days_after"] = data["O3"].shift(365 - 3)
175
- data["NO2_last_year_3_days_after"] = data["NO2"].shift(365 - 3)
176
 
177
  # Calculate the number of rows before dropping missing values
178
  rows_before = data.shape[0]
@@ -209,4 +209,4 @@ def create_features_and_targets(
209
  X_scaled, columns=feature_cols, index=x.index
210
  )
211
 
212
- return X_scaled
 
162
 
163
  # Create particle data (NO2 and O3) from the same time last year
164
  # Today last year
165
+ data["O3_last_year"] = 0 # data["O3_last_year"] = data["O3"].shift(365)
166
+ data["NO2_last_year"] = 0 # data["NO2_last_year"] = data["NO2"].shift(365)
167
 
168
  # 7 days before today last year
169
  for i in range(1, lag_days + 1):
170
+ data[f"O3_last_year_{i}_days_before"] = 0 # data["O3"].shift(365 + i)
171
+ data[f"NO2_last_year_{i}_days_before"] = 0 # data["NO2"].shift(365 + i)
172
 
173
  # 3 days after today last year
174
+ data["O3_last_year_3_days_after"] = 0 # data["O3"].shift(365 - 3)
175
+ data["NO2_last_year_3_days_after"] = 0 # data["NO2"].shift(365 - 3)
176
 
177
  # Calculate the number of rows before dropping missing values
178
  rows_before = data.shape[0]
 
209
  X_scaled, columns=feature_cols, index=x.index
210
  )
211
 
212
+ return x
test.ipynb CHANGED
@@ -13,7 +13,7 @@
13
  },
14
  {
15
  "cell_type": "code",
16
- "execution_count": 8,
17
  "metadata": {},
18
  "outputs": [],
19
  "source": [
@@ -22,7 +22,7 @@
22
  },
23
  {
24
  "cell_type": "code",
25
- "execution_count": 11,
26
  "metadata": {},
27
  "outputs": [
28
  {
@@ -31,25 +31,6 @@
31
  "text": [
32
  "Number of rows with missing values dropped: 7\n"
33
  ]
34
- },
35
- {
36
- "ename": "ValueError",
37
- "evalue": "Found array with 0 sample(s) (shape=(0, 92)) while a minimum of 1 is required by StandardScaler.",
38
- "output_type": "error",
39
- "traceback": [
40
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
41
- "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)",
42
- "Cell \u001b[0;32mIn[11], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m X, y \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features_and_targets\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 2\u001b[0m \u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdataset\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNO2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mlag_days\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m6\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43msma_days\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m6\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mdays_ahead\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m3\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m)\u001b[49m\n",
43
- "File \u001b[0;32m~/Desktop/utrecht-pollution-prediction/data_loading.py:214\u001b[0m, in \u001b[0;36mcreate_features_and_targets\u001b[0;34m(data, target_particle, lag_days, sma_days, days_ahead)\u001b[0m\n\u001b[1;32m 211\u001b[0m target_scaler \u001b[38;5;241m=\u001b[39m StandardScaler()\n\u001b[1;32m 213\u001b[0m \u001b[38;5;66;03m# Fit the scalers on the training data\u001b[39;00m\n\u001b[0;32m--> 214\u001b[0m X_scaled \u001b[38;5;241m=\u001b[39m \u001b[43mfeature_scaler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit_transform\u001b[49m\u001b[43m(\u001b[49m\u001b[43mx\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 215\u001b[0m y_scaled \u001b[38;5;241m=\u001b[39m target_scaler\u001b[38;5;241m.\u001b[39mfit_transform(y)\n\u001b[1;32m 217\u001b[0m \u001b[38;5;66;03m# Convert scaled data back to DataFrame for consistency\u001b[39;00m\n",
44
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/utils/_set_output.py:313\u001b[0m, in \u001b[0;36m_wrap_method_output.<locals>.wrapped\u001b[0;34m(self, X, *args, **kwargs)\u001b[0m\n\u001b[1;32m 311\u001b[0m \u001b[38;5;129m@wraps\u001b[39m(f)\n\u001b[1;32m 312\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mwrapped\u001b[39m(\u001b[38;5;28mself\u001b[39m, X, \u001b[38;5;241m*\u001b[39margs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs):\n\u001b[0;32m--> 313\u001b[0m data_to_wrap \u001b[38;5;241m=\u001b[39m \u001b[43mf\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 314\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(data_to_wrap, \u001b[38;5;28mtuple\u001b[39m):\n\u001b[1;32m 315\u001b[0m \u001b[38;5;66;03m# only wrap the first output for cross decomposition\u001b[39;00m\n\u001b[1;32m 316\u001b[0m return_tuple \u001b[38;5;241m=\u001b[39m (\n\u001b[1;32m 317\u001b[0m _wrap_data_with_container(method, data_to_wrap[\u001b[38;5;241m0\u001b[39m], X, \u001b[38;5;28mself\u001b[39m),\n\u001b[1;32m 318\u001b[0m \u001b[38;5;241m*\u001b[39mdata_to_wrap[\u001b[38;5;241m1\u001b[39m:],\n\u001b[1;32m 319\u001b[0m )\n",
45
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/base.py:1098\u001b[0m, in \u001b[0;36mTransformerMixin.fit_transform\u001b[0;34m(self, X, y, **fit_params)\u001b[0m\n\u001b[1;32m 1083\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1084\u001b[0m (\n\u001b[1;32m 1085\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThis object (\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) has a `transform`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;167;01mUserWarning\u001b[39;00m,\n\u001b[1;32m 1094\u001b[0m )\n\u001b[1;32m 1096\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m y \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1097\u001b[0m \u001b[38;5;66;03m# fit method of arity 1 (unsupervised transformation)\u001b[39;00m\n\u001b[0;32m-> 1098\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mfit_params\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39mtransform(X)\n\u001b[1;32m 1099\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 1100\u001b[0m \u001b[38;5;66;03m# fit method of arity 2 (supervised transformation)\u001b[39;00m\n\u001b[1;32m 1101\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mfit(X, y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mfit_params)\u001b[38;5;241m.\u001b[39mtransform(X)\n",
46
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/preprocessing/_data.py:878\u001b[0m, in \u001b[0;36mStandardScaler.fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 876\u001b[0m \u001b[38;5;66;03m# Reset internal state before fitting\u001b[39;00m\n\u001b[1;32m 877\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_reset()\n\u001b[0;32m--> 878\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mpartial_fit\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43my\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43msample_weight\u001b[49m\u001b[43m)\u001b[49m\n",
47
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/base.py:1473\u001b[0m, in \u001b[0;36m_fit_context.<locals>.decorator.<locals>.wrapper\u001b[0;34m(estimator, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1466\u001b[0m estimator\u001b[38;5;241m.\u001b[39m_validate_params()\n\u001b[1;32m 1468\u001b[0m \u001b[38;5;28;01mwith\u001b[39;00m config_context(\n\u001b[1;32m 1469\u001b[0m skip_parameter_validation\u001b[38;5;241m=\u001b[39m(\n\u001b[1;32m 1470\u001b[0m prefer_skip_nested_validation \u001b[38;5;129;01mor\u001b[39;00m global_skip_validation\n\u001b[1;32m 1471\u001b[0m )\n\u001b[1;32m 1472\u001b[0m ):\n\u001b[0;32m-> 1473\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfit_method\u001b[49m\u001b[43m(\u001b[49m\u001b[43mestimator\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
48
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/preprocessing/_data.py:914\u001b[0m, in \u001b[0;36mStandardScaler.partial_fit\u001b[0;34m(self, X, y, sample_weight)\u001b[0m\n\u001b[1;32m 882\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"Online computation of mean and std on X for later scaling.\u001b[39;00m\n\u001b[1;32m 883\u001b[0m \n\u001b[1;32m 884\u001b[0m \u001b[38;5;124;03mAll of X is processed as a single batch. This is intended for cases\u001b[39;00m\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 911\u001b[0m \u001b[38;5;124;03m Fitted scaler.\u001b[39;00m\n\u001b[1;32m 912\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 913\u001b[0m first_call \u001b[38;5;241m=\u001b[39m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(\u001b[38;5;28mself\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mn_samples_seen_\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 914\u001b[0m X \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_validate_data\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 915\u001b[0m \u001b[43m \u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 916\u001b[0m \u001b[43m \u001b[49m\u001b[43maccept_sparse\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcsr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mcsc\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 917\u001b[0m \u001b[43m \u001b[49m\u001b[43mdtype\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mFLOAT_DTYPES\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 918\u001b[0m \u001b[43m \u001b[49m\u001b[43mforce_all_finite\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mallow-nan\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 919\u001b[0m \u001b[43m \u001b[49m\u001b[43mreset\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mfirst_call\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 920\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 921\u001b[0m n_features \u001b[38;5;241m=\u001b[39m X\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m 923\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m sample_weight \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n",
49
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/base.py:633\u001b[0m, in \u001b[0;36mBaseEstimator._validate_data\u001b[0;34m(self, X, y, reset, validate_separately, cast_to_ndarray, **check_params)\u001b[0m\n\u001b[1;32m 631\u001b[0m out \u001b[38;5;241m=\u001b[39m X, y\n\u001b[1;32m 632\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m no_val_y:\n\u001b[0;32m--> 633\u001b[0m out \u001b[38;5;241m=\u001b[39m \u001b[43mcheck_array\u001b[49m\u001b[43m(\u001b[49m\u001b[43mX\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43minput_name\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mX\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mcheck_params\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 634\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m no_val_X \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m no_val_y:\n\u001b[1;32m 635\u001b[0m out \u001b[38;5;241m=\u001b[39m _check_y(y, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mcheck_params)\n",
50
- "File \u001b[0;32m~/anaconda3/envs/ml-industry/lib/python3.12/site-packages/sklearn/utils/validation.py:1087\u001b[0m, in \u001b[0;36mcheck_array\u001b[0;34m(array, accept_sparse, accept_large_sparse, dtype, order, copy, force_writeable, force_all_finite, ensure_2d, allow_nd, ensure_min_samples, ensure_min_features, estimator, input_name)\u001b[0m\n\u001b[1;32m 1085\u001b[0m n_samples \u001b[38;5;241m=\u001b[39m _num_samples(array)\n\u001b[1;32m 1086\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m n_samples \u001b[38;5;241m<\u001b[39m ensure_min_samples:\n\u001b[0;32m-> 1087\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mFound array with \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m sample(s) (shape=\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m) while a\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m minimum of \u001b[39m\u001b[38;5;132;01m%d\u001b[39;00m\u001b[38;5;124m is required\u001b[39m\u001b[38;5;132;01m%s\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1090\u001b[0m \u001b[38;5;241m%\u001b[39m (n_samples, array\u001b[38;5;241m.\u001b[39mshape, ensure_min_samples, context)\n\u001b[1;32m 1091\u001b[0m )\n\u001b[1;32m 1093\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m ensure_min_features \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m array\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m2\u001b[39m:\n\u001b[1;32m 1094\u001b[0m n_features \u001b[38;5;241m=\u001b[39m array\u001b[38;5;241m.\u001b[39mshape[\u001b[38;5;241m1\u001b[39m]\n",
51
- "\u001b[0;31mValueError\u001b[0m: Found array with 0 sample(s) (shape=(0, 92)) while a minimum of 1 is required by StandardScaler."
52
- ]
53
  }
54
  ],
55
  "source": [
@@ -61,6 +42,126 @@
61
  " days_ahead=3,\n",
62
  ")"
63
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  }
65
  ],
66
  "metadata": {
 
13
  },
14
  {
15
  "cell_type": "code",
16
+ "execution_count": 2,
17
  "metadata": {},
18
  "outputs": [],
19
  "source": [
 
22
  },
23
  {
24
  "cell_type": "code",
25
+ "execution_count": 3,
26
  "metadata": {},
27
  "outputs": [
28
  {
 
31
  "text": [
32
  "Number of rows with missing values dropped: 7\n"
33
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  }
35
  ],
36
  "source": [
 
42
  " days_ahead=3,\n",
43
  ")"
44
  ]
45
+ },
46
+ {
47
+ "cell_type": "code",
48
+ "execution_count": 5,
49
+ "metadata": {},
50
+ "outputs": [
51
+ {
52
+ "data": {
53
+ "text/html": [
54
+ "<div>\n",
55
+ "<style scoped>\n",
56
+ " .dataframe tbody tr th:only-of-type {\n",
57
+ " vertical-align: middle;\n",
58
+ " }\n",
59
+ "\n",
60
+ " .dataframe tbody tr th {\n",
61
+ " vertical-align: top;\n",
62
+ " }\n",
63
+ "\n",
64
+ " .dataframe thead th {\n",
65
+ " text-align: right;\n",
66
+ " }\n",
67
+ "</style>\n",
68
+ "<table border=\"1\" class=\"dataframe\">\n",
69
+ " <thead>\n",
70
+ " <tr style=\"text-align: right;\">\n",
71
+ " <th></th>\n",
72
+ " <th>NO2</th>\n",
73
+ " <th>O3</th>\n",
74
+ " <th>wind_speed</th>\n",
75
+ " <th>mean_temp</th>\n",
76
+ " <th>global_radiation</th>\n",
77
+ " <th>percipitation</th>\n",
78
+ " <th>pressure</th>\n",
79
+ " <th>minimum_visibility</th>\n",
80
+ " <th>humidity</th>\n",
81
+ " <th>weekday_sin</th>\n",
82
+ " <th>...</th>\n",
83
+ " <th>O3_last_year_4_days_before</th>\n",
84
+ " <th>NO2_last_year_4_days_before</th>\n",
85
+ " <th>O3_last_year_5_days_before</th>\n",
86
+ " <th>NO2_last_year_5_days_before</th>\n",
87
+ " <th>O3_last_year_6_days_before</th>\n",
88
+ " <th>NO2_last_year_6_days_before</th>\n",
89
+ " <th>O3_last_year_7_days_before</th>\n",
90
+ " <th>NO2_last_year_7_days_before</th>\n",
91
+ " <th>O3_last_year_3_days_after</th>\n",
92
+ " <th>NO2_last_year_3_days_after</th>\n",
93
+ " </tr>\n",
94
+ " </thead>\n",
95
+ " <tbody>\n",
96
+ " <tr>\n",
97
+ " <th>0</th>\n",
98
+ " <td>20.281667</td>\n",
99
+ " <td>25.787521</td>\n",
100
+ " <td>76</td>\n",
101
+ " <td>121</td>\n",
102
+ " <td>54</td>\n",
103
+ " <td>97</td>\n",
104
+ " <td>10266</td>\n",
105
+ " <td>116</td>\n",
106
+ " <td>87</td>\n",
107
+ " <td>0.781831</td>\n",
108
+ " <td>...</td>\n",
109
+ " <td>0</td>\n",
110
+ " <td>0</td>\n",
111
+ " <td>0</td>\n",
112
+ " <td>0</td>\n",
113
+ " <td>0</td>\n",
114
+ " <td>0</td>\n",
115
+ " <td>0</td>\n",
116
+ " <td>0</td>\n",
117
+ " <td>0</td>\n",
118
+ " <td>0</td>\n",
119
+ " </tr>\n",
120
+ " </tbody>\n",
121
+ "</table>\n",
122
+ "<p>1 rows × 103 columns</p>\n",
123
+ "</div>"
124
+ ],
125
+ "text/plain": [
126
+ " NO2 O3 wind_speed mean_temp global_radiation \\\n",
127
+ "0 20.281667 25.787521 76 121 54 \n",
128
+ "\n",
129
+ " percipitation pressure minimum_visibility humidity weekday_sin ... \\\n",
130
+ "0 97 10266 116 87 0.781831 ... \n",
131
+ "\n",
132
+ " O3_last_year_4_days_before NO2_last_year_4_days_before \\\n",
133
+ "0 0 0 \n",
134
+ "\n",
135
+ " O3_last_year_5_days_before NO2_last_year_5_days_before \\\n",
136
+ "0 0 0 \n",
137
+ "\n",
138
+ " O3_last_year_6_days_before NO2_last_year_6_days_before \\\n",
139
+ "0 0 0 \n",
140
+ "\n",
141
+ " O3_last_year_7_days_before NO2_last_year_7_days_before \\\n",
142
+ "0 0 0 \n",
143
+ "\n",
144
+ " O3_last_year_3_days_after NO2_last_year_3_days_after \n",
145
+ "0 0 0 \n",
146
+ "\n",
147
+ "[1 rows x 103 columns]"
148
+ ]
149
+ },
150
+ "execution_count": 5,
151
+ "metadata": {},
152
+ "output_type": "execute_result"
153
+ }
154
+ ],
155
+ "source": [
156
+ "test_data"
157
+ ]
158
+ },
159
+ {
160
+ "cell_type": "code",
161
+ "execution_count": null,
162
+ "metadata": {},
163
+ "outputs": [],
164
+ "source": []
165
  }
166
  ],
167
  "metadata": {