elisaklunder commited on
Commit
e2ebde2
·
1 Parent(s): 94c13a3

admin panel

Browse files
adjusted_predictions_O3_NO2.csv ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pollutant,date_predicted,date,prediction_value
2
+ O3,2024-10-14,2024-10-17,37.50407
3
+ NO2,2024-10-14,2024-10-17,31.70604
4
+ O3,2024-10-15,2024-10-17,24.20011
5
+ NO2,2024-10-15,2024-10-17,31.45461
6
+ O3,2024-10-16,2024-10-17,9.6575
7
+ NO2,2024-10-16,2024-10-17,17.0652
8
+ O3,2024-10-15,2024-10-18,7.87344
9
+ NO2,2024-10-15,2024-10-18,31.73243
10
+ O3,2024-10-16,2024-10-18,21.76064
11
+ NO2,2024-10-16,2024-10-18,40.0994
12
+ O3,2024-10-17,2024-10-18,16.0884
13
+ NO2,2024-10-17,2024-10-18,32.0458
14
+ O3,2024-10-16,2024-10-19,28.83768
15
+ NO2,2024-10-16,2024-10-19,24.10068
16
+ O3,2024-10-17,2024-10-19,23.13454
17
+ NO2,2024-10-17,2024-10-19,29.79229
18
+ O3,2024-10-17,2024-10-19,22.53339
19
+ NO2,2024-10-17,2024-10-19,26.22730
20
+ O3,2024-10-18,2024-10-19,22.3045
21
+ NO2,2024-10-18,2024-10-19,20.8002
22
+ O3,2024-10-18,2024-10-20,31.34.37874
23
+ NO2,2024-10-18,2024-10-20,32.70553
24
+ O3,2024-10-18,2024-10-20,31.54536
25
+ NO2,2024-10-18,2024-10-20,38.55104
26
+ O3,2024-10-19,2024-10-20,26.4217
27
+ NO2,2024-10-19,2024-10-20,27.3999
28
+ O3,2024-10-19,2024-10-20,17.0652
29
+ NO2,2024-10-19,2024-10-20,18.9924
30
+ O3,2024-10-19,2024-10-21,19.13648
31
+ NO2,2024-10-19,2024-10-21,25.13566
32
+ O3,2024-10-19,2024-10-21,24.20011
33
+ NO2,2024-10-19,2024-10-21,20.09909
34
+ O3,2024-10-20,2024-10-21,29.0094
35
+ NO2,2024-10-20,2024-10-21,19.5074
36
+ O3,2024-10-20,2024-10-21,20.0621
37
+ NO2,2024-10-20,2024-10-21,23.6575
38
+ O3,2024-10-20,2024-10-22,19.24714
39
+ NO2,2024-10-20,2024-10-22,31.45461
40
+ O3,2024-10-21,2024-10-22,16.52
41
+ NO2,2024-10-21,2024-10-22,30.1924
42
+ O3,2024-10-21,2024-10-23,31.01934
43
+ NO2,2024-10-21,2024-10-23,19.74775
44
+ O3,2024-10-21,2024-10-23,17.70241
45
+ NO2,2024-10-21,2024-10-23,27.73936
46
+ O3,2024-10-22,2024-10-24,25.96605
47
+ NO2,2024-10-22,2024-10-24,31.90517
48
+ O3,2024-10-22,2024-10-24,29.22139
49
+ NO2,2024-10-22,2024-10-24,22.16907
50
+ O3,2024-10-23,2024-10-24,9.0905
51
+ NO2,2024-10-23,2024-10-24,18.1749
52
+ O3,2024-10-23,2024-10-24,20.205
53
+ NO2,2024-10-23,2024-10-24,27.3349
54
+ O3,2024-10-24,2024-10-25,9.090494810363
55
+ NO2,2024-10-24,2024-10-25,18.174884683860732
56
+ O3,2024-10-24,2024-10-26,20.2050211414084
57
+ NO2,2024-10-24,2024-10-26,27.334877510848962
58
+ O3,2024-10-24,2024-10-27,16.31643675723712
59
+ NO2,2024-10-24,2024-10-27,23.015396915310056
app.py CHANGED
@@ -185,3 +185,4 @@ with col2:
185
  ),
186
  )
187
  st.plotly_chart(fig_no2, key="fig_no2")
 
 
185
  ),
186
  )
187
  st.plotly_chart(fig_no2, key="fig_no2")
188
+
pages/admin.py CHANGED
@@ -1,63 +1,196 @@
1
  import numpy as np
2
  import pandas as pd
 
3
  import streamlit as st
 
4
 
5
- USERNAME = "admin"
6
  PASSWORD = "password"
7
 
8
  st.title("Admin Panel")
9
 
 
 
 
 
10
  # Login Form
11
- login_success = False
12
- with st.form("login_form"):
13
- st.write("Please login to access the admin dashboard:")
14
- username = st.text_input("Username")
15
- password = st.text_input("Password", type="password")
16
- login_button = st.form_submit_button("Login")
17
-
18
- if login_button:
19
- if username == USERNAME and password == PASSWORD:
20
- login_success = True
21
- st.success("Login successful!")
22
- else:
23
- st.error("Invalid username or password.")
24
-
25
- # After successful login
26
- if login_success:
27
- # Display information about model performance
28
- st.header("Model Performance Metrics")
29
-
30
- model_r2_score = 0.85 # Mock R^2 Score
31
- avg_prediction_time = 0.15 # Mock Average Prediction Time in seconds
32
- num_predictions_made = 2000 # Mock Number of Predictions Made
33
-
34
- st.metric(label="R² Score", value=f"{model_r2_score:.2f}")
35
- st.metric(
36
- label="Average Prediction Time", value=f"{avg_prediction_time:.2f} seconds"
 
37
  )
38
- st.metric(label="Total Predictions Made", value=num_predictions_made)
39
 
40
- st.subheader("Detailed Metrics")
41
- detailed_metrics = pd.DataFrame(
42
- {
43
- "Metric": ["MAE", "MSE", "RMSE", "Training Time"],
44
- "Value": [2.5, 3.4, 1.8, "1.2 hours"],
45
- }
 
 
 
 
 
 
 
 
 
 
 
46
  )
47
- st.table(detailed_metrics)
48
-
49
- # Mocking prediction latency over time (example chart)
50
- st.subheader("Prediction Latency Over Time")
51
- latency_data = pd.DataFrame(
52
- {
53
- "Date": pd.date_range(end=pd.Timestamp.today(), periods=7).to_list(),
54
- "Prediction Time (s)": np.random.uniform(0.1, 0.5, 7),
55
- }
56
  )
57
- st.line_chart(latency_data.set_index("Date"))
58
 
59
- # Button to simulate refreshing metrics
60
- if st.button("Refresh Metrics"):
61
- st.experimental_rerun()
62
- else:
63
- st.warning("Please login to access the admin panel.")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import numpy as np
2
  import pandas as pd
3
+ import plotly.graph_objects as go
4
  import streamlit as st
5
+ from sklearn.metrics import mean_squared_error
6
 
7
+ USERNAME = "dragonkiller"
8
  PASSWORD = "password"
9
 
10
  st.title("Admin Panel")
11
 
12
+ # Use session state to remember login state
13
+ if "login_success" not in st.session_state:
14
+ st.session_state.login_success = False
15
+
16
  # Login Form
17
+ if not st.session_state.login_success:
18
+ with st.form("login_form"):
19
+ st.write("Please login to access the admin dashboard:")
20
+ username = st.text_input("Username")
21
+ password = st.text_input("Password", type="password")
22
+ login_button = st.form_submit_button("Login")
23
+
24
+ if login_button:
25
+ if username == USERNAME and password == PASSWORD:
26
+ st.session_state.login_success = True
27
+ st.success("Login successful!")
28
+ else:
29
+ st.error("Invalid username or password.")
30
+ else:
31
+ # Actual data vs 1,2,3 days ahead predictions
32
+ actual_data = pd.read_csv("pollution_data.csv")
33
+ prediction_data = pd.read_csv("prediction_history.csv")
34
+
35
+ col1, col2 = st.columns(2)
36
+ with col1:
37
+ pollutant = st.radio("Select a pollutant", ("O3", "NO2"))
38
+ with col2:
39
+ days_ahead = st.radio("Select days ahead for prediction", (1, 2, 3))
40
+
41
+ predictions = prediction_data[prediction_data["pollutant"] == pollutant]
42
+ actual = actual_data[["date", pollutant]].rename(
43
+ columns={pollutant: "actual_value"}
44
  )
 
45
 
46
+ predictions_filtered = predictions[
47
+ predictions["date_predicted"]
48
+ == (
49
+ pd.to_datetime(predictions["date"]) - pd.Timedelta(days=days_ahead)
50
+ ).dt.strftime("%Y-%m-%d")
51
+ ]
52
+
53
+ fig = go.Figure()
54
+
55
+ fig.add_trace(
56
+ go.Scatter(
57
+ x=actual["date"],
58
+ y=actual["actual_value"],
59
+ mode="lines+markers",
60
+ name="Ground Truth",
61
+ line=dict(color="green", width=3),
62
+ )
63
  )
64
+
65
+ fig.add_trace(
66
+ go.Scatter(
67
+ x=predictions_filtered["date"],
68
+ y=predictions_filtered["prediction_value"],
69
+ mode="lines+markers",
70
+ name=f"Prediction {days_ahead} day(s) ahead",
71
+ line=dict(dash="dash", color="orange", width=3),
72
+ )
73
  )
 
74
 
75
+ fig.update_layout(
76
+ title=f"{pollutant} Predictions vs Actual Values",
77
+ xaxis_title="Date",
78
+ yaxis_title=f"{pollutant} Concentration",
79
+ legend=dict(x=0, y=1),
80
+ yaxis=dict(range=[0, 60]),
81
+ template="plotly_white",
82
+ xaxis=dict(
83
+ title="Date",
84
+ type="date",
85
+ tickmode="array",
86
+ tickvals=predictions["date"],
87
+ tickformat="%d-%b",
88
+ tickangle=-45,
89
+ tickcolor="gray",
90
+ ),
91
+ )
92
+
93
+ st.plotly_chart(fig)
94
+
95
+ # Evaluation Function
96
+ def evaluate_predictions_all_days(actual, predictions):
97
+ rmse_values_all = {"O3": [], "NO2": []}
98
+ smape_values_all = {"O3": [], "NO2": []}
99
+
100
+ for pollutant in ["O3", "NO2"]:
101
+ predictions_pollutant = predictions[predictions["pollutant"] == pollutant]
102
+ actual_pollutant = actual_data[["date", pollutant]].rename(
103
+ columns={pollutant: "actual_value"}
104
+ )
105
+
106
+ # Calculate RMSE and SMAPE for each day (1st, 2nd, and 3rd)
107
+ for i in range(1, 4):
108
+ predictions_filtered = predictions_pollutant[
109
+ predictions_pollutant["date_predicted"]
110
+ == (
111
+ pd.to_datetime(predictions_pollutant["date"])
112
+ - pd.Timedelta(days=i)
113
+ ).dt.strftime("%Y-%m-%d")
114
+ ]
115
+ actual_filtered = actual_pollutant[
116
+ actual_pollutant["date"].isin(predictions_filtered["date"])
117
+ ]
118
+ merged = pd.merge(
119
+ actual_filtered,
120
+ predictions_filtered,
121
+ left_on="date",
122
+ right_on="date",
123
+ )
124
+
125
+ if not merged.empty:
126
+ actual_values = merged["actual_value"].values
127
+ prediction_values = merged["prediction_value"].values
128
+
129
+ rmse = np.sqrt(mean_squared_error(actual_values, prediction_values))
130
+ rmse_values_all[pollutant].append(rmse)
131
+ smape = (
132
+ 100
133
+ / len(actual_values)
134
+ * np.sum(
135
+ 2
136
+ * np.abs(prediction_values - actual_values)
137
+ / (np.abs(actual_values) + np.abs(prediction_values))
138
+ )
139
+ )
140
+ smape_values_all[pollutant].append(smape)
141
+
142
+ # Plot RMSE and SMAPE for both pollutants
143
+ fig_rmse = go.Figure()
144
+ for day in range(3):
145
+ fig_rmse.add_trace(
146
+ go.Bar(
147
+ x=["O3", "NO2"],
148
+ y=[rmse_values_all["O3"][day], rmse_values_all["NO2"][day]],
149
+ name=f"Day {day + 1}",
150
+ )
151
+ )
152
+ fig_rmse.update_layout(
153
+ title="RMSE for Predictions Over 3 Days",
154
+ yaxis_title="RMSE",
155
+ xaxis_title="Pollutant",
156
+ barmode="group",
157
+ )
158
+ st.plotly_chart(fig_rmse)
159
+
160
+ fig_smape = go.Figure()
161
+ for day in range(3):
162
+ fig_smape.add_trace(
163
+ go.Bar(
164
+ x=["O3", "NO2"],
165
+ y=[smape_values_all["O3"][day], smape_values_all["NO2"][day]],
166
+ name=f"Day {day + 1}",
167
+ )
168
+ )
169
+ fig_smape.update_layout(
170
+ title="SMAPE for Predictions Over 3 Days",
171
+ yaxis_title="SMAPE (%)",
172
+ xaxis_title="Pollutant",
173
+ barmode="group",
174
+ )
175
+ st.plotly_chart(fig_smape)
176
+
177
+ # Calculate total current SMAPE and RMSE
178
+ total_O3_smape = sum(smape_values_all["O3"]) / len(smape_values_all)
179
+ total_NO2_smape = sum(smape_values_all["NO2"]) / len(smape_values_all)
180
+ total_O3_rmse = sum(rmse_values_all["O3"]) / len(rmse_values_all)
181
+ total_NO2_rmse = sum(rmse_values_all["NO2"]) / len(rmse_values_all)
182
+
183
+ # Display metrics table
184
+ metrics_data = {
185
+ "Metric": [
186
+ "Current NO2 SMAPE (%)",
187
+ "Current NO2 RMSE (µg/m3)",
188
+ "Current O3 SMAPE (%)",
189
+ "Current O3 RMSE (µg/m3)",
190
+ ],
191
+ "Value": [total_NO2_smape, total_NO2_rmse, total_O3_smape, total_O3_rmse],
192
+ }
193
+ metrics_df = pd.DataFrame(metrics_data)
194
+ st.table(metrics_df)
195
+
196
+ evaluate_predictions_all_days(actual_data, prediction_data)
prediction_history.csv ADDED
@@ -0,0 +1,55 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ pollutant,date_predicted,date,prediction_value
2
+ O3,2024-10-14,2024-10-17,31.253351852448926
3
+ NO2,2024-10-14,2024-10-17,26.421736787446267
4
+ O3,2024-10-15,2024-10-17,22.000057677604474
5
+ NO2,2024-10-15,2024-10-17,28.59511317503212
6
+ O3,2024-10-16,2024-10-17,9.657466070999735
7
+ NO2,2024-10-16,2024-10-17,17.065168790519902
8
+ O3,2024-10-15,2024-10-18,6.561248
9
+ NO2,2024-10-15,2024-10-18,26.443672
10
+ O3,2024-10-16,2024-10-18,19.782418
11
+ NO2,2024-10-16,2024-10-18,36.453956
12
+ O3,2024-10-17,2024-10-18,16.08841798553393
13
+ NO2,2024-10-17,2024-10-18,32.0458143607889
14
+ O3,2024-10-16,2024-10-19,24.031357603260783
15
+ NO2,2024-10-16,2024-10-19,20.083893955587914
16
+ O3,2024-10-17,2024-10-19,21.031357603260783
17
+ NO2,2024-10-17,2024-10-19,27.083893955587914
18
+ O3,2024-10-17,2024-10-20,20.484862479793236
19
+ NO2,2024-10-17,2024-10-20,23.84300578029378
20
+ O3,2024-10-18,2024-10-19,22.304547122637445
21
+ NO2,2024-10-18,2024-10-19,20.80017116560889
22
+ O3,2024-10-18,2024-10-20,31.253351852448926
23
+ NO2,2024-10-18,2024-10-20,29.732316066240582
24
+ O3,2024-10-18,2024-10-21,28.67755196805434
25
+ NO2,2024-10-18,2024-10-21,35.04638743773354
26
+ O3,2024-10-19,2024-10-20,26.421736787446267
27
+ NO2,2024-10-19,2024-10-20,27.399885723190767
28
+ O3,2024-10-19,2024-10-21,17.065168790519902
29
+ NO2,2024-10-19,2024-10-21,18.992352714813563
30
+ O3,2024-10-19,2024-10-22,17.39682962048955
31
+ NO2,2024-10-19,2024-10-22,22.850616758859076
32
+ O3,2024-10-20,2024-10-21,22.000057677604474
33
+ NO2,2024-10-20,2024-10-21,18.27191592927812
34
+ O3,2024-10-20,2024-10-22,29.00940466937953
35
+ NO2,2024-10-20,2024-10-22,19.507397669634972
36
+ O3,2024-10-20,2024-10-23,20.062134354543346
37
+ NO2,2024-10-20,2024-10-23,23.657466070999735
38
+ O3,2024-10-21,2024-10-22,17.497382318189132
39
+ NO2,2024-10-21,2024-10-22,28.59511317503212
40
+ O3,2024-10-21,2024-10-23,16.519952190354232
41
+ NO2,2024-10-21,2024-10-23,30.192389708351822
42
+ O3,2024-10-21,2024-10-24,28.199403851129034
43
+ NO2,2024-10-21,2024-10-24,17.9525039623211
44
+ O3,2024-10-22,2024-10-23,16.093074246425157
45
+ NO2,2024-10-22,2024-10-23,25.217639978187005
46
+ O3,2024-10-22,2024-10-24,23.605545201596552
47
+ NO2,2024-10-22,2024-10-24,29.004701753536988
48
+ O3,2024-10-23,2024-10-24,26.56486295059828
49
+ NO2,2024-10-23,2024-10-24,20.153737337472574
50
+ O3,2024-10-24,2024-10-25,9.090494810363
51
+ NO2,2024-10-24,2024-10-25,18.174884683860732
52
+ O3,2024-10-24,2024-10-26,20.2050211414084
53
+ NO2,2024-10-24,2024-10-26,27.334877510848962
54
+ O3,2024-10-24,2024-10-27,16.31643675723712
55
+ NO2,2024-10-24,2024-10-27,23.015396915310056
predictions_history.csv CHANGED
@@ -5,9 +5,3 @@ O3,2024-10-24,2024-10-26,16.000984317626852
5
  NO2,2024-10-24,2024-10-26,25.760307451092384
6
  O3,2024-10-24,2024-10-27,19.64377495640328
7
  NO2,2024-10-24,2024-10-27,31.210576791105115
8
- O3,2024-10-24,2024-10-25,10.33808859423279
9
- NO2,2024-10-24,2024-10-25,25.68519991558237
10
- O3,2024-10-24,2024-10-26,16.000984317626852
11
- NO2,2024-10-24,2024-10-26,25.760307451092384
12
- O3,2024-10-24,2024-10-27,19.64377495640328
13
- NO2,2024-10-24,2024-10-27,31.210576791105115
 
5
  NO2,2024-10-24,2024-10-26,25.760307451092384
6
  O3,2024-10-24,2024-10-27,19.64377495640328
7
  NO2,2024-10-24,2024-10-27,31.210576791105115
 
 
 
 
 
 
src/helper_functions.py CHANGED
@@ -42,3 +42,4 @@ def pollution_box(label, value, delta):
42
  <p style="color: {'green' if '+' in delta else 'orange'}; margin: 0;">{delta}</p>
43
  </div>
44
  """, unsafe_allow_html=True)
 
 
42
  <p style="color: {'green' if '+' in delta else 'orange'}; margin: 0;">{delta}</p>
43
  </div>
44
  """, unsafe_allow_html=True)
45
+
src/predict.py CHANGED
@@ -47,7 +47,7 @@ def get_data_and_predictions():
47
  prediction_data.append(
48
  {
49
  "pollutant": "O3",
50
- "date_predicted": date.today(),
51
  "date": date.today() + timedelta(days=i + 1),
52
  "prediction_value": o3_predictions[0][i],
53
  }
@@ -55,7 +55,7 @@ def get_data_and_predictions():
55
  prediction_data.append(
56
  {
57
  "pollutant": "NO2",
58
- "date_predicted": date.today(),
59
  "date": date.today() + timedelta(days=i + 1),
60
  "prediction_value": no2_predictions[0][i],
61
  }
@@ -65,10 +65,11 @@ def get_data_and_predictions():
65
 
66
  if os.path.exists(PREDICTIONS_FILE):
67
  existing_data = pd.read_csv(PREDICTIONS_FILE)
 
 
 
 
68
  combined_data = pd.concat([existing_data, predictions_df])
69
- combined_data = combined_data.drop_duplicates(
70
- subset=["pollutant", "date_predicted", "date"], keep="first"
71
- )
72
  else:
73
  combined_data = predictions_df
74
 
 
47
  prediction_data.append(
48
  {
49
  "pollutant": "O3",
50
+ "predicted_on": date.today(),
51
  "date": date.today() + timedelta(days=i + 1),
52
  "prediction_value": o3_predictions[0][i],
53
  }
 
55
  prediction_data.append(
56
  {
57
  "pollutant": "NO2",
58
+ "predicted_on": date.today(),
59
  "date": date.today() + timedelta(days=i + 1),
60
  "prediction_value": no2_predictions[0][i],
61
  }
 
65
 
66
  if os.path.exists(PREDICTIONS_FILE):
67
  existing_data = pd.read_csv(PREDICTIONS_FILE)
68
+ # Filter out predictions made today to avoid duplicates
69
+ existing_data = existing_data[
70
+ ~(existing_data["predicted_on"] == str(date.today()))
71
+ ]
72
  combined_data = pd.concat([existing_data, predictions_df])
 
 
 
73
  else:
74
  combined_data = predictions_df
75
 
test.ipynb CHANGED
@@ -267,6 +267,40 @@
267
  "source": [
268
  "predictions_NO2"
269
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
270
  }
271
  ],
272
  "metadata": {
 
267
  "source": [
268
  "predictions_NO2"
269
  ]
270
+ },
271
+ {
272
+ "cell_type": "code",
273
+ "execution_count": 1,
274
+ "metadata": {},
275
+ "outputs": [],
276
+ "source": [
277
+ "from src.data_api_calls import get_combined_data"
278
+ ]
279
+ },
280
+ {
281
+ "cell_type": "code",
282
+ "execution_count": 2,
283
+ "metadata": {},
284
+ "outputs": [
285
+ {
286
+ "ename": "TypeError",
287
+ "evalue": "'<' not supported between instances of 'Timestamp' and 'str'",
288
+ "output_type": "error",
289
+ "traceback": [
290
+ "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
291
+ "\u001b[1;31mTypeError\u001b[0m Traceback (most recent call last)",
292
+ "Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mget_combined_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2024-10-10\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
293
+ "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\data_api_calls.py:136\u001b[0m, in \u001b[0;36mget_combined_data\u001b[1;34m(input_date)\u001b[0m\n\u001b[0;32m 133\u001b[0m start_date \u001b[38;5;241m=\u001b[39m end_date \u001b[38;5;241m-\u001b[39m timedelta(\u001b[38;5;241m7\u001b[39m)\n\u001b[0;32m 135\u001b[0m update_weather_data(start_date, end_date)\n\u001b[1;32m--> 136\u001b[0m \u001b[43mupdate_pollution_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend_date\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 138\u001b[0m weather_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(WEATHER_DATA_FILE)\n\u001b[0;32m 140\u001b[0m weather_df\u001b[38;5;241m.\u001b[39minsert(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNO2\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n",
294
+ "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\data_api_calls.py:123\u001b[0m, in \u001b[0;36mupdate_pollution_data\u001b[1;34m(start_date, end_date)\u001b[0m\n\u001b[0;32m 121\u001b[0m updated_data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat([existing_data, new_data], ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m 122\u001b[0m updated_data\u001b[38;5;241m.\u001b[39mdrop_duplicates(subset\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m\"\u001b[39m, keep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlast\u001b[39m\u001b[38;5;124m\"\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m--> 123\u001b[0m updated_data \u001b[38;5;241m=\u001b[39m \u001b[43mupdated_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 124\u001b[0m updated_data\u001b[38;5;241m.\u001b[39mto_csv(POLLUTION_DATA_FILE, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
295
+ "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:7200\u001b[0m, in \u001b[0;36mDataFrame.sort_values\u001b[1;34m(self, by, axis, ascending, inplace, kind, na_position, ignore_index, key)\u001b[0m\n\u001b[0;32m 7197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ascending, (\u001b[38;5;28mtuple\u001b[39m, \u001b[38;5;28mlist\u001b[39m)):\n\u001b[0;32m 7198\u001b[0m ascending \u001b[38;5;241m=\u001b[39m ascending[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m-> 7200\u001b[0m indexer \u001b[38;5;241m=\u001b[39m \u001b[43mnargsort\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m 7201\u001b[0m \u001b[43m \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mascending\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_position\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\n\u001b[0;32m 7202\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 7203\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 7204\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n",
296
+ "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\sorting.py:439\u001b[0m, in \u001b[0;36mnargsort\u001b[1;34m(items, kind, ascending, na_position, key, mask)\u001b[0m\n\u001b[0;32m 437\u001b[0m non_nans \u001b[38;5;241m=\u001b[39m non_nans[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m 438\u001b[0m non_nan_idx \u001b[38;5;241m=\u001b[39m non_nan_idx[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m--> 439\u001b[0m indexer \u001b[38;5;241m=\u001b[39m non_nan_idx[\u001b[43mnon_nans\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margsort\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m)\u001b[49m]\n\u001b[0;32m 440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ascending:\n\u001b[0;32m 441\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
297
+ "\u001b[1;31mTypeError\u001b[0m: '<' not supported between instances of 'Timestamp' and 'str'"
298
+ ]
299
+ }
300
+ ],
301
+ "source": [
302
+ "get_combined_data(\"2024-10-10\")"
303
+ ]
304
  }
305
  ],
306
  "metadata": {
weather_data.csv CHANGED
@@ -1,4 +1,18 @@
1
  date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  2024-10-17,16.9,86.0,0.6,18.4,1010.0,37.1,43.0
3
  2024-10-18,15.5,97.3,3.9,7.6,1014.0,4.5,42.9
4
  2024-10-19,14.7,89.9,1.6,14.8,1014.1,22.8,43.5
 
1
  date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
2
+ 2024-10-03,9.9,78.9,0.0,25.9,1020.0,44.0,81.8
3
+ 2024-10-04,7.7,86.2,0.0,11.2,1022.0,25.0,86.3
4
+ 2024-10-05,8.0,83.2,0.0,14.8,1019.0,26.0,76.5
5
+ 2024-10-06,10.0,82.7,0.0,25.9,1007.7,36.5,77.2
6
+ 2024-10-07,15.3,81.0,0.4,22.3,1000.8,37.5,65.6
7
+ 2024-10-08,16.0,83.4,0.6,18.4,997.4,40.0,55.6
8
+ 2024-10-09,14.2,88.3,1.0,22.3,990.1,37.2,38.2
9
+ 2024-10-10,12.3,83.6,1.0,18.4,997.3,28.2,52.5
10
+ 2024-10-11,8.5,84.4,1.0,14.8,1015.3,36.1,62.9
11
+ 2024-10-12,7.8,89.4,0.0,21.6,1011.3,25.5,49.3
12
+ 2024-10-13,10.5,75.8,3.7,18.4,1013.2,36.8,49.9
13
+ 2024-10-14,7.9,89.8,0.0,10.8,1018.3,24.3,44.1
14
+ 2024-10-15,8.7,86.9,0.0,18.4,1019.4,29.0,71.2
15
+ 2024-10-16,15.1,82.2,0.0,22.3,1010.3,35.8,40.8
16
  2024-10-17,16.9,86.0,0.6,18.4,1010.0,37.1,43.0
17
  2024-10-18,15.5,97.3,3.9,7.6,1014.0,4.5,42.9
18
  2024-10-19,14.7,89.9,1.6,14.8,1014.1,22.8,43.5