elisaklunder
commited on
Commit
•
3dd6a8c
1
Parent(s):
1d3c9ee
stupid api
Browse files- pollution_data.csv +2 -2
- scalers/feature_scaler_NO2.joblib +1 -1
- src/data_api_calls.py +16 -4
- src/features_pipeline.py +0 -1
- src/past_data_api_calls.py +2 -2
- src/predict.py +30 -7
- test.ipynb +12 -24
pollution_data.csv
CHANGED
@@ -2,8 +2,8 @@ date,NO2,O3
|
|
2 |
2024-10-17,22.804605103280675,22.769159859976643
|
3 |
2024-10-18,23.2685,23.30733245729302
|
4 |
2024-10-19,23.91006441223834,23.1717142857143
|
5 |
-
2024-10-20,22.
|
6 |
-
2024-10-21,21.1457004830918,24.
|
7 |
2024-10-22,21.776579804560274,23.33588571428572
|
8 |
2024-10-23,21.974793814433,22.21468879668051
|
9 |
2024-10-24,25.51256756756757,20.91370967741937
|
|
|
2 |
2024-10-17,22.804605103280675,22.769159859976643
|
3 |
2024-10-18,23.2685,23.30733245729302
|
4 |
2024-10-19,23.91006441223834,23.1717142857143
|
5 |
+
2024-10-20,22.57323754789273,23.53784452296821
|
6 |
+
2024-10-21,21.1457004830918,24.02069565217393
|
7 |
2024-10-22,21.776579804560274,23.33588571428572
|
8 |
2024-10-23,21.974793814433,22.21468879668051
|
9 |
2024-10-24,25.51256756756757,20.91370967741937
|
scalers/feature_scaler_NO2.joblib
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5791
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d2731707963125bbb452df55c91920c62eb745c7e78c0a79bdf7fab173b3369
|
3 |
size 5791
|
src/data_api_calls.py
CHANGED
@@ -142,11 +142,19 @@ def get_combined_data():
|
|
142 |
update_pollution_data()
|
143 |
|
144 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
145 |
-
pollution_df = pd.read_csv(POLLUTION_DATA_FILE)
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
150 |
|
151 |
# Apply scaling and renaming similar to the scale function from previous code
|
152 |
combined_df = combined_df.rename(
|
@@ -177,5 +185,9 @@ def get_combined_data():
|
|
177 |
combined_df["pressure"] = combined_df["pressure"].astype(int)
|
178 |
combined_df["humidity"] = combined_df["humidity"].astype(int)
|
179 |
combined_df["global_radiation"] = combined_df["global_radiation"].astype(int)
|
|
|
|
|
|
|
|
|
180 |
|
181 |
return combined_df
|
|
|
142 |
update_pollution_data()
|
143 |
|
144 |
weather_df = pd.read_csv(WEATHER_DATA_FILE)
|
|
|
145 |
|
146 |
+
weather_df.insert(1, "NO2", None)
|
147 |
+
weather_df.insert(2, "O3", None)
|
148 |
+
weather_df.insert(10, "weekday", None)
|
149 |
+
columns = list(weather_df.columns)
|
150 |
+
columns.insert(3, columns.pop(6))
|
151 |
+
weather_df = weather_df[columns]
|
152 |
+
columns.insert(5, columns.pop(9))
|
153 |
+
weather_df = weather_df[columns]
|
154 |
+
columns.insert(9, columns.pop(6))
|
155 |
+
weather_df = weather_df[columns]
|
156 |
+
|
157 |
+
combined_df = weather_df
|
158 |
|
159 |
# Apply scaling and renaming similar to the scale function from previous code
|
160 |
combined_df = combined_df.rename(
|
|
|
185 |
combined_df["pressure"] = combined_df["pressure"].astype(int)
|
186 |
combined_df["humidity"] = combined_df["humidity"].astype(int)
|
187 |
combined_df["global_radiation"] = combined_df["global_radiation"].astype(int)
|
188 |
+
|
189 |
+
pollution_df = pd.read_csv(POLLUTION_DATA_FILE)
|
190 |
+
combined_df["NO2"] = pollution_df["NO2"]
|
191 |
+
combined_df["O3"] = pollution_df["O3"]
|
192 |
|
193 |
return combined_df
|
src/features_pipeline.py
CHANGED
@@ -3,7 +3,6 @@ import warnings
|
|
3 |
import joblib
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
6 |
-
|
7 |
from src.past_data_api_calls import get_past_combined_data
|
8 |
|
9 |
warnings.filterwarnings("ignore")
|
|
|
3 |
import joblib
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
|
|
6 |
from src.past_data_api_calls import get_past_combined_data
|
7 |
|
8 |
warnings.filterwarnings("ignore")
|
src/past_data_api_calls.py
CHANGED
@@ -15,8 +15,8 @@ PAST_POLLUTION_DATA_FILE = "pollution_data.csv"
|
|
15 |
|
16 |
def get_past_weather_data():
|
17 |
last_year_date = date.today() - timedelta(days=365)
|
18 |
-
start_date = (last_year_date - timedelta(days=
|
19 |
-
end_date = (last_year_date + timedelta(days=
|
20 |
|
21 |
try:
|
22 |
ResultBytes = urllib.request.urlopen(
|
|
|
15 |
|
16 |
def get_past_weather_data():
|
17 |
last_year_date = date.today() - timedelta(days=365)
|
18 |
+
start_date = (last_year_date - timedelta(days=8)).isoformat()
|
19 |
+
end_date = (last_year_date + timedelta(days=2)).isoformat()
|
20 |
|
21 |
try:
|
22 |
ResultBytes = urllib.request.urlopen(
|
src/predict.py
CHANGED
@@ -1,15 +1,14 @@
|
|
1 |
import os
|
|
|
2 |
|
3 |
import joblib
|
4 |
-
import
|
5 |
from dotenv import load_dotenv
|
6 |
from huggingface_hub import hf_hub_download, login
|
7 |
-
|
8 |
from src.data_api_calls import get_combined_data
|
9 |
from src.features_pipeline import create_features
|
10 |
|
11 |
|
12 |
-
@st.cache_resource()
|
13 |
def load_model(particle):
|
14 |
load_dotenv()
|
15 |
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
@@ -39,9 +38,33 @@ def get_data_and_predictions():
|
|
39 |
|
40 |
week_data = get_combined_data()
|
41 |
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
return week_data, o3_predictions, no2_predictions
|
|
|
1 |
import os
|
2 |
+
from datetime import date, timedelta
|
3 |
|
4 |
import joblib
|
5 |
+
import pandas as pd
|
6 |
from dotenv import load_dotenv
|
7 |
from huggingface_hub import hf_hub_download, login
|
|
|
8 |
from src.data_api_calls import get_combined_data
|
9 |
from src.features_pipeline import create_features
|
10 |
|
11 |
|
|
|
12 |
def load_model(particle):
|
13 |
load_dotenv()
|
14 |
login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN"))
|
|
|
38 |
|
39 |
week_data = get_combined_data()
|
40 |
|
41 |
+
o3_predictions = run_model("O3", data=week_data)
|
42 |
+
no2_predictions = run_model("NO2", data=week_data)
|
43 |
+
|
44 |
+
prediction_data = []
|
45 |
+
for i in range(3):
|
46 |
+
prediction_data.append(
|
47 |
+
{
|
48 |
+
"pollutant": "O3",
|
49 |
+
"date_predicted": date.today(),
|
50 |
+
"date": date.today() + timedelta(days=i + 1),
|
51 |
+
"prediction_value": o3_predictions[i],
|
52 |
+
}
|
53 |
+
)
|
54 |
+
prediction_data.append(
|
55 |
+
{
|
56 |
+
"pollutant": "NO2",
|
57 |
+
"date_predicted": date.today(),
|
58 |
+
"date": date.today() + timedelta(days=i + 1),
|
59 |
+
"prediction_value": no2_predictions[i],
|
60 |
+
}
|
61 |
+
)
|
62 |
+
|
63 |
+
predictions_df = pd.DataFrame(prediction_data)
|
64 |
+
|
65 |
+
if os.path.exists(PREDICTIONS_FILE):
|
66 |
+
predictions_df.to_csv(PREDICTIONS_FILE, mode="a", header=False, index=False)
|
67 |
+
else:
|
68 |
+
predictions_df.to_csv(PREDICTIONS_FILE, mode="w", header=True, index=False)
|
69 |
|
70 |
return week_data, o3_predictions, no2_predictions
|
test.ipynb
CHANGED
@@ -15,43 +15,31 @@
|
|
15 |
}
|
16 |
],
|
17 |
"source": [
|
18 |
-
"from src.data_api_calls import get_combined_data\n",
|
19 |
-
"from src.past_data_api_calls import get_past_combined_data\n",
|
20 |
"from src.predict import get_data_and_predictions"
|
21 |
]
|
22 |
},
|
23 |
-
{
|
24 |
-
"cell_type": "code",
|
25 |
-
"execution_count": null,
|
26 |
-
"metadata": {},
|
27 |
-
"outputs": [],
|
28 |
-
"source": [
|
29 |
-
"get_past_combined_data()"
|
30 |
-
]
|
31 |
-
},
|
32 |
{
|
33 |
"cell_type": "code",
|
34 |
"execution_count": 2,
|
35 |
"metadata": {},
|
36 |
"outputs": [
|
37 |
{
|
38 |
-
"ename": "
|
39 |
-
"evalue": "
|
40 |
"output_type": "error",
|
41 |
"traceback": [
|
42 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
43 |
-
"\u001b[1;
|
44 |
"Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m week_data, predictions_O3, predictions_NO2 \u001b[38;5;241m=\u001b[39m \u001b[43mget_data_and_predictions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
45 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:
|
46 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\
|
47 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\
|
48 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\past_data_api_calls.py:
|
49 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\
|
50 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\
|
51 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\
|
52 |
-
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\
|
53 |
-
"
|
54 |
-
"\u001b[1;31mOSError\u001b[0m: [Errno 22] Invalid argument: 'NO2_2023-10-18T09:00:00Z.csv'"
|
55 |
]
|
56 |
}
|
57 |
],
|
|
|
15 |
}
|
16 |
],
|
17 |
"source": [
|
|
|
|
|
18 |
"from src.predict import get_data_and_predictions"
|
19 |
]
|
20 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
{
|
22 |
"cell_type": "code",
|
23 |
"execution_count": 2,
|
24 |
"metadata": {},
|
25 |
"outputs": [
|
26 |
{
|
27 |
+
"ename": "ValueError",
|
28 |
+
"evalue": "Length of values (0) does not match length of index (11)",
|
29 |
"output_type": "error",
|
30 |
"traceback": [
|
31 |
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
32 |
+
"\u001b[1;31mValueError\u001b[0m Traceback (most recent call last)",
|
33 |
"Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m week_data, predictions_O3, predictions_NO2 \u001b[38;5;241m=\u001b[39m \u001b[43mget_data_and_predictions\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n",
|
34 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:41\u001b[0m, in \u001b[0;36mget_data_and_predictions\u001b[1;34m()\u001b[0m\n\u001b[0;32m 37\u001b[0m PREDICTIONS_FILE \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpredictions_history.csv\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 39\u001b[0m week_data \u001b[38;5;241m=\u001b[39m get_combined_data()\n\u001b[1;32m---> 41\u001b[0m o3_predictions \u001b[38;5;241m=\u001b[39m \u001b[43mrun_model\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mO3\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mweek_data\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 42\u001b[0m no2_predictions \u001b[38;5;241m=\u001b[39m run_model(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNO2\u001b[39m\u001b[38;5;124m\"\u001b[39m, data\u001b[38;5;241m=\u001b[39mweek_data)\n\u001b[0;32m 44\u001b[0m prediction_data \u001b[38;5;241m=\u001b[39m []\n",
|
35 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\predict.py:28\u001b[0m, in \u001b[0;36mrun_model\u001b[1;34m(particle, data)\u001b[0m\n\u001b[0;32m 27\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mrun_model\u001b[39m(particle, data):\n\u001b[1;32m---> 28\u001b[0m input_data \u001b[38;5;241m=\u001b[39m \u001b[43mcreate_features\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdata\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mdata\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtarget_particle\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mparticle\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 29\u001b[0m model \u001b[38;5;241m=\u001b[39m load_model(particle)\n\u001b[0;32m 30\u001b[0m prediction \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mpredict(input_data)\n",
|
36 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\features_pipeline.py:60\u001b[0m, in \u001b[0;36mcreate_features\u001b[1;34m(data, target_particle, lag_days, sma_days)\u001b[0m\n\u001b[0;32m 55\u001b[0m data[\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mfeature\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m_sma_\u001b[39m\u001b[38;5;132;01m{\u001b[39;00msma_days\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m (\n\u001b[0;32m 56\u001b[0m data[feature]\u001b[38;5;241m.\u001b[39mrolling(window\u001b[38;5;241m=\u001b[39msma_days)\u001b[38;5;241m.\u001b[39mmean()\n\u001b[0;32m 57\u001b[0m )\n\u001b[0;32m 59\u001b[0m \u001b[38;5;66;03m# Create particle data (NO2 and O3) from the same time last year\u001b[39;00m\n\u001b[1;32m---> 60\u001b[0m past_data \u001b[38;5;241m=\u001b[39m \u001b[43mget_past_combined_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 62\u001b[0m \u001b[38;5;66;03m# Today last year\u001b[39;00m\n\u001b[0;32m 63\u001b[0m data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3_last_year\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m past_data[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3\u001b[39m\u001b[38;5;124m\"\u001b[39m]\u001b[38;5;241m.\u001b[39miloc[\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m4\u001b[39m]\n",
|
37 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\past_data_api_calls.py:99\u001b[0m, in \u001b[0;36mget_past_combined_data\u001b[1;34m()\u001b[0m\n\u001b[0;32m 96\u001b[0m NO2_df, O3_df \u001b[38;5;241m=\u001b[39m get_past_pollution_data()\n\u001b[0;32m 98\u001b[0m combined_df \u001b[38;5;241m=\u001b[39m weather_df\n\u001b[1;32m---> 99\u001b[0m \u001b[43mcombined_df\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNO2\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m]\u001b[49m \u001b[38;5;241m=\u001b[39m NO2_df\n\u001b[0;32m 100\u001b[0m combined_df[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mO3\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m O3_df\n\u001b[0;32m 102\u001b[0m \u001b[38;5;66;03m# Apply scaling and renaming similar to the scale function from previous code\u001b[39;00m\n",
|
38 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4311\u001b[0m, in \u001b[0;36mDataFrame.__setitem__\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 4308\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_setitem_array([key], value)\n\u001b[0;32m 4309\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 4310\u001b[0m \u001b[38;5;66;03m# set column\u001b[39;00m\n\u001b[1;32m-> 4311\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_set_item\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkey\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n",
|
39 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:4524\u001b[0m, in \u001b[0;36mDataFrame._set_item\u001b[1;34m(self, key, value)\u001b[0m\n\u001b[0;32m 4514\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21m_set_item\u001b[39m(\u001b[38;5;28mself\u001b[39m, key, value) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 4515\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 4516\u001b[0m \u001b[38;5;124;03m Add series to DataFrame in specified column.\u001b[39;00m\n\u001b[0;32m 4517\u001b[0m \n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 4522\u001b[0m \u001b[38;5;124;03m ensure homogeneity.\u001b[39;00m\n\u001b[0;32m 4523\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[1;32m-> 4524\u001b[0m value, refs \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_sanitize_column\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 4526\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 4527\u001b[0m key \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\n\u001b[0;32m 4528\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mndim \u001b[38;5;241m==\u001b[39m \u001b[38;5;241m1\u001b[39m\n\u001b[0;32m 4529\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value\u001b[38;5;241m.\u001b[39mdtype, ExtensionDtype)\n\u001b[0;32m 4530\u001b[0m ):\n\u001b[0;32m 4531\u001b[0m \u001b[38;5;66;03m# broadcast across multiple columns if necessary\u001b[39;00m\n\u001b[0;32m 4532\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns\u001b[38;5;241m.\u001b[39mis_unique \u001b[38;5;129;01mor\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(\u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mcolumns, MultiIndex):\n",
|
40 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:5266\u001b[0m, in \u001b[0;36mDataFrame._sanitize_column\u001b[1;34m(self, value)\u001b[0m\n\u001b[0;32m 5263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m _reindex_for_setitem(value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex)\n\u001b[0;32m 5265\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m is_list_like(value):\n\u001b[1;32m-> 5266\u001b[0m \u001b[43mcom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequire_length_match\u001b[49m\u001b[43m(\u001b[49m\u001b[43mvalue\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mindex\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 5267\u001b[0m arr \u001b[38;5;241m=\u001b[39m sanitize_array(value, \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mindex, copy\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m, allow_2d\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 5268\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m (\n\u001b[0;32m 5269\u001b[0m \u001b[38;5;28misinstance\u001b[39m(value, Index)\n\u001b[0;32m 5270\u001b[0m \u001b[38;5;129;01mand\u001b[39;00m value\u001b[38;5;241m.\u001b[39mdtype \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mobject\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m (...)\u001b[0m\n\u001b[0;32m 5273\u001b[0m \u001b[38;5;66;03m# TODO: Remove kludge in sanitize_array for string mode when enforcing\u001b[39;00m\n\u001b[0;32m 5274\u001b[0m \u001b[38;5;66;03m# this deprecation\u001b[39;00m\n",
|
41 |
+
"File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\common.py:573\u001b[0m, in \u001b[0;36mrequire_length_match\u001b[1;34m(data, index)\u001b[0m\n\u001b[0;32m 569\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 570\u001b[0m \u001b[38;5;124;03mCheck the length of data matches the length of the index.\u001b[39;00m\n\u001b[0;32m 571\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[0;32m 572\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mlen\u001b[39m(data) \u001b[38;5;241m!=\u001b[39m \u001b[38;5;28mlen\u001b[39m(index):\n\u001b[1;32m--> 573\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[0;32m 574\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mLength of values \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 575\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(data)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m) \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 576\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdoes not match length of index \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 577\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m(\u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mlen\u001b[39m(index)\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 578\u001b[0m )\n",
|
42 |
+
"\u001b[1;31mValueError\u001b[0m: Length of values (0) does not match length of index (11)"
|
|
|
43 |
]
|
44 |
}
|
45 |
],
|