Spaces:

Mihkelmj
/

utrecht-pollution-prediction

Sleeping

App Files Files Community

elisaklunder commited on Oct 24, 2024

Commit

e2ebde2

1 Parent(s): 94c13a3

admin panel

Browse files

Files changed (9) hide show

adjusted_predictions_O3_NO2.csv +59 -0
app.py +1 -0
pages/admin.py +182 -49
prediction_history.csv +55 -0
predictions_history.csv +0 -6
src/helper_functions.py +1 -0
src/predict.py +6 -5
test.ipynb +34 -0
weather_data.csv +14 -0

adjusted_predictions_O3_NO2.csv ADDED Viewed

	@@ -0,0 +1,59 @@

+pollutant,date_predicted,date,prediction_value
+O3,2024-10-14,2024-10-17,37.50407
+NO2,2024-10-14,2024-10-17,31.70604
+O3,2024-10-15,2024-10-17,24.20011
+NO2,2024-10-15,2024-10-17,31.45461
+O3,2024-10-16,2024-10-17,9.6575
+NO2,2024-10-16,2024-10-17,17.0652
+O3,2024-10-15,2024-10-18,7.87344
+NO2,2024-10-15,2024-10-18,31.73243
+O3,2024-10-16,2024-10-18,21.76064
+NO2,2024-10-16,2024-10-18,40.0994
+O3,2024-10-17,2024-10-18,16.0884
+NO2,2024-10-17,2024-10-18,32.0458
+O3,2024-10-16,2024-10-19,28.83768
+NO2,2024-10-16,2024-10-19,24.10068
+O3,2024-10-17,2024-10-19,23.13454
+NO2,2024-10-17,2024-10-19,29.79229
+O3,2024-10-17,2024-10-19,22.53339
+NO2,2024-10-17,2024-10-19,26.22730
+O3,2024-10-18,2024-10-19,22.3045
+NO2,2024-10-18,2024-10-19,20.8002
+O3,2024-10-18,2024-10-20,31.34.37874
+NO2,2024-10-18,2024-10-20,32.70553
+O3,2024-10-18,2024-10-20,31.54536
+NO2,2024-10-18,2024-10-20,38.55104
+O3,2024-10-19,2024-10-20,26.4217
+NO2,2024-10-19,2024-10-20,27.3999
+O3,2024-10-19,2024-10-20,17.0652
+NO2,2024-10-19,2024-10-20,18.9924
+O3,2024-10-19,2024-10-21,19.13648
+NO2,2024-10-19,2024-10-21,25.13566
+O3,2024-10-19,2024-10-21,24.20011
+NO2,2024-10-19,2024-10-21,20.09909
+O3,2024-10-20,2024-10-21,29.0094
+NO2,2024-10-20,2024-10-21,19.5074
+O3,2024-10-20,2024-10-21,20.0621
+NO2,2024-10-20,2024-10-21,23.6575
+O3,2024-10-20,2024-10-22,19.24714
+NO2,2024-10-20,2024-10-22,31.45461
+O3,2024-10-21,2024-10-22,16.52
+NO2,2024-10-21,2024-10-22,30.1924
+O3,2024-10-21,2024-10-23,31.01934
+NO2,2024-10-21,2024-10-23,19.74775
+O3,2024-10-21,2024-10-23,17.70241
+NO2,2024-10-21,2024-10-23,27.73936
+O3,2024-10-22,2024-10-24,25.96605
+NO2,2024-10-22,2024-10-24,31.90517
+O3,2024-10-22,2024-10-24,29.22139
+NO2,2024-10-22,2024-10-24,22.16907
+O3,2024-10-23,2024-10-24,9.0905
+NO2,2024-10-23,2024-10-24,18.1749
+O3,2024-10-23,2024-10-24,20.205
+NO2,2024-10-23,2024-10-24,27.3349
+O3,2024-10-24,2024-10-25,9.090494810363
+NO2,2024-10-24,2024-10-25,18.174884683860732
+O3,2024-10-24,2024-10-26,20.2050211414084
+NO2,2024-10-24,2024-10-26,27.334877510848962
+O3,2024-10-24,2024-10-27,16.31643675723712
+NO2,2024-10-24,2024-10-27,23.015396915310056

app.py CHANGED Viewed

@@ -185,3 +185,4 @@ with col2:
         ),
     )
     st.plotly_chart(fig_no2, key="fig_no2")

         ),
     )
     st.plotly_chart(fig_no2, key="fig_no2")

pages/admin.py CHANGED Viewed

@@ -1,63 +1,196 @@
 import numpy as np
 import pandas as pd
 import streamlit as st
-USERNAME = "admin"
 PASSWORD = "password"
 st.title("Admin Panel")
 # Login Form
-login_success = False
-with st.form("login_form"):
-    st.write("Please login to access the admin dashboard:")
-    username = st.text_input("Username")
-    password = st.text_input("Password", type="password")
-    login_button = st.form_submit_button("Login")
-    if login_button:
-        if username == USERNAME and password == PASSWORD:
-            login_success = True
-            st.success("Login successful!")
-        else:
-            st.error("Invalid username or password.")
-# After successful login
-if login_success:
-    # Display information about model performance
-    st.header("Model Performance Metrics")
-    model_r2_score = 0.85  # Mock R^2 Score
-    avg_prediction_time = 0.15  # Mock Average Prediction Time in seconds
-    num_predictions_made = 2000  # Mock Number of Predictions Made
-    st.metric(label="R² Score", value=f"{model_r2_score:.2f}")
-    st.metric(
-        label="Average Prediction Time", value=f"{avg_prediction_time:.2f} seconds"
     )
-    st.metric(label="Total Predictions Made", value=num_predictions_made)
-    st.subheader("Detailed Metrics")
-    detailed_metrics = pd.DataFrame(
-        {
-            "Metric": ["MAE", "MSE", "RMSE", "Training Time"],
-            "Value": [2.5, 3.4, 1.8, "1.2 hours"],
-        }
     )
-    st.table(detailed_metrics)
-    # Mocking prediction latency over time (example chart)
-    st.subheader("Prediction Latency Over Time")
-    latency_data = pd.DataFrame(
-        {
-            "Date": pd.date_range(end=pd.Timestamp.today(), periods=7).to_list(),
-            "Prediction Time (s)": np.random.uniform(0.1, 0.5, 7),
-        }
     )
-    st.line_chart(latency_data.set_index("Date"))
-    # Button to simulate refreshing metrics
-    if st.button("Refresh Metrics"):
-        st.experimental_rerun()
-else:
-    st.warning("Please login to access the admin panel.")

 import numpy as np
 import pandas as pd
+import plotly.graph_objects as go
 import streamlit as st
+from sklearn.metrics import mean_squared_error
+USERNAME = "dragonkiller"
 PASSWORD = "password"
 st.title("Admin Panel")
+# Use session state to remember login state
+if "login_success" not in st.session_state:
+    st.session_state.login_success = False
 # Login Form
+if not st.session_state.login_success:
+    with st.form("login_form"):
+        st.write("Please login to access the admin dashboard:")
+        username = st.text_input("Username")
+        password = st.text_input("Password", type="password")
+        login_button = st.form_submit_button("Login")
+        if login_button:
+            if username == USERNAME and password == PASSWORD:
+                st.session_state.login_success = True
+                st.success("Login successful!")
+            else:
+                st.error("Invalid username or password.")
+else:
+    # Actual data vs 1,2,3 days ahead predictions
+    actual_data = pd.read_csv("pollution_data.csv")
+    prediction_data = pd.read_csv("prediction_history.csv")
+    col1, col2 = st.columns(2)
+    with col1:
+        pollutant = st.radio("Select a pollutant", ("O3", "NO2"))
+    with col2:
+        days_ahead = st.radio("Select days ahead for prediction", (1, 2, 3))
+    predictions = prediction_data[prediction_data["pollutant"] == pollutant]
+    actual = actual_data[["date", pollutant]].rename(
+        columns={pollutant: "actual_value"}
     )
+    predictions_filtered = predictions[
+        predictions["date_predicted"]
+        == (
+            pd.to_datetime(predictions["date"]) - pd.Timedelta(days=days_ahead)
+        ).dt.strftime("%Y-%m-%d")
+    ]
+    fig = go.Figure()
+    fig.add_trace(
+        go.Scatter(
+            x=actual["date"],
+            y=actual["actual_value"],
+            mode="lines+markers",
+            name="Ground Truth",
+            line=dict(color="green", width=3),
+        )
     )
+    fig.add_trace(
+        go.Scatter(
+            x=predictions_filtered["date"],
+            y=predictions_filtered["prediction_value"],
+            mode="lines+markers",
+            name=f"Prediction {days_ahead} day(s) ahead",
+            line=dict(dash="dash", color="orange", width=3),
+        )
     )
+    fig.update_layout(
+        title=f"{pollutant} Predictions vs Actual Values",
+        xaxis_title="Date",
+        yaxis_title=f"{pollutant} Concentration",
+        legend=dict(x=0, y=1),
+        yaxis=dict(range=[0, 60]),
+        template="plotly_white",
+        xaxis=dict(
+            title="Date",
+            type="date",
+            tickmode="array",
+            tickvals=predictions["date"],
+            tickformat="%d-%b",
+            tickangle=-45,
+            tickcolor="gray",
+        ),
+    )
+    st.plotly_chart(fig)
+    # Evaluation Function
+    def evaluate_predictions_all_days(actual, predictions):
+        rmse_values_all = {"O3": [], "NO2": []}
+        smape_values_all = {"O3": [], "NO2": []}
+        for pollutant in ["O3", "NO2"]:
+            predictions_pollutant = predictions[predictions["pollutant"] == pollutant]
+            actual_pollutant = actual_data[["date", pollutant]].rename(
+                columns={pollutant: "actual_value"}
+            )
+            # Calculate RMSE and SMAPE for each day (1st, 2nd, and 3rd)
+            for i in range(1, 4):
+                predictions_filtered = predictions_pollutant[
+                    predictions_pollutant["date_predicted"]
+                    == (
+                        pd.to_datetime(predictions_pollutant["date"])
+                        - pd.Timedelta(days=i)
+                    ).dt.strftime("%Y-%m-%d")
+                ]
+                actual_filtered = actual_pollutant[
+                    actual_pollutant["date"].isin(predictions_filtered["date"])
+                ]
+                merged = pd.merge(
+                    actual_filtered,
+                    predictions_filtered,
+                    left_on="date",
+                    right_on="date",
+                )
+                if not merged.empty:
+                    actual_values = merged["actual_value"].values
+                    prediction_values = merged["prediction_value"].values
+                    rmse = np.sqrt(mean_squared_error(actual_values, prediction_values))
+                    rmse_values_all[pollutant].append(rmse)
+                    smape = (
+                        100
+                        / len(actual_values)
+                        * np.sum(
+                            2
+                            * np.abs(prediction_values - actual_values)
+                            / (np.abs(actual_values) + np.abs(prediction_values))
+                        )
+                    )
+                    smape_values_all[pollutant].append(smape)
+        # Plot RMSE and SMAPE for both pollutants
+        fig_rmse = go.Figure()
+        for day in range(3):
+            fig_rmse.add_trace(
+                go.Bar(
+                    x=["O3", "NO2"],
+                    y=[rmse_values_all["O3"][day], rmse_values_all["NO2"][day]],
+                    name=f"Day {day + 1}",
+                )
+            )
+        fig_rmse.update_layout(
+            title="RMSE for Predictions Over 3 Days",
+            yaxis_title="RMSE",
+            xaxis_title="Pollutant",
+            barmode="group",
+        )
+        st.plotly_chart(fig_rmse)
+        fig_smape = go.Figure()
+        for day in range(3):
+            fig_smape.add_trace(
+                go.Bar(
+                    x=["O3", "NO2"],
+                    y=[smape_values_all["O3"][day], smape_values_all["NO2"][day]],
+                    name=f"Day {day + 1}",
+                )
+            )
+        fig_smape.update_layout(
+            title="SMAPE for Predictions Over 3 Days",
+            yaxis_title="SMAPE (%)",
+            xaxis_title="Pollutant",
+            barmode="group",
+        )
+        st.plotly_chart(fig_smape)
+        # Calculate total current SMAPE and RMSE
+        total_O3_smape = sum(smape_values_all["O3"]) / len(smape_values_all)
+        total_NO2_smape = sum(smape_values_all["NO2"]) / len(smape_values_all)
+        total_O3_rmse = sum(rmse_values_all["O3"]) / len(rmse_values_all)
+        total_NO2_rmse = sum(rmse_values_all["NO2"]) / len(rmse_values_all)
+        # Display metrics table
+        metrics_data = {
+            "Metric": [
+                "Current NO2 SMAPE (%)",
+                "Current NO2 RMSE (µg/m3)",
+                "Current O3 SMAPE (%)",
+                "Current O3 RMSE (µg/m3)",
+            ],
+            "Value": [total_NO2_smape, total_NO2_rmse, total_O3_smape, total_O3_rmse],
+        }
+        metrics_df = pd.DataFrame(metrics_data)
+        st.table(metrics_df)
+    evaluate_predictions_all_days(actual_data, prediction_data)

prediction_history.csv ADDED Viewed

	@@ -0,0 +1,55 @@

+pollutant,date_predicted,date,prediction_value
+O3,2024-10-14,2024-10-17,31.253351852448926
+NO2,2024-10-14,2024-10-17,26.421736787446267
+O3,2024-10-15,2024-10-17,22.000057677604474
+NO2,2024-10-15,2024-10-17,28.59511317503212
+O3,2024-10-16,2024-10-17,9.657466070999735
+NO2,2024-10-16,2024-10-17,17.065168790519902
+O3,2024-10-15,2024-10-18,6.561248
+NO2,2024-10-15,2024-10-18,26.443672
+O3,2024-10-16,2024-10-18,19.782418
+NO2,2024-10-16,2024-10-18,36.453956
+O3,2024-10-17,2024-10-18,16.08841798553393
+NO2,2024-10-17,2024-10-18,32.0458143607889
+O3,2024-10-16,2024-10-19,24.031357603260783
+NO2,2024-10-16,2024-10-19,20.083893955587914
+O3,2024-10-17,2024-10-19,21.031357603260783
+NO2,2024-10-17,2024-10-19,27.083893955587914
+O3,2024-10-17,2024-10-20,20.484862479793236
+NO2,2024-10-17,2024-10-20,23.84300578029378
+O3,2024-10-18,2024-10-19,22.304547122637445
+NO2,2024-10-18,2024-10-19,20.80017116560889
+O3,2024-10-18,2024-10-20,31.253351852448926
+NO2,2024-10-18,2024-10-20,29.732316066240582
+O3,2024-10-18,2024-10-21,28.67755196805434
+NO2,2024-10-18,2024-10-21,35.04638743773354
+O3,2024-10-19,2024-10-20,26.421736787446267
+NO2,2024-10-19,2024-10-20,27.399885723190767
+O3,2024-10-19,2024-10-21,17.065168790519902
+NO2,2024-10-19,2024-10-21,18.992352714813563
+O3,2024-10-19,2024-10-22,17.39682962048955
+NO2,2024-10-19,2024-10-22,22.850616758859076
+O3,2024-10-20,2024-10-21,22.000057677604474
+NO2,2024-10-20,2024-10-21,18.27191592927812
+O3,2024-10-20,2024-10-22,29.00940466937953
+NO2,2024-10-20,2024-10-22,19.507397669634972
+O3,2024-10-20,2024-10-23,20.062134354543346
+NO2,2024-10-20,2024-10-23,23.657466070999735
+O3,2024-10-21,2024-10-22,17.497382318189132
+NO2,2024-10-21,2024-10-22,28.59511317503212
+O3,2024-10-21,2024-10-23,16.519952190354232
+NO2,2024-10-21,2024-10-23,30.192389708351822
+O3,2024-10-21,2024-10-24,28.199403851129034
+NO2,2024-10-21,2024-10-24,17.9525039623211
+O3,2024-10-22,2024-10-23,16.093074246425157
+NO2,2024-10-22,2024-10-23,25.217639978187005
+O3,2024-10-22,2024-10-24,23.605545201596552
+NO2,2024-10-22,2024-10-24,29.004701753536988
+O3,2024-10-23,2024-10-24,26.56486295059828
+NO2,2024-10-23,2024-10-24,20.153737337472574
+O3,2024-10-24,2024-10-25,9.090494810363
+NO2,2024-10-24,2024-10-25,18.174884683860732
+O3,2024-10-24,2024-10-26,20.2050211414084
+NO2,2024-10-24,2024-10-26,27.334877510848962
+O3,2024-10-24,2024-10-27,16.31643675723712
+NO2,2024-10-24,2024-10-27,23.015396915310056

predictions_history.csv CHANGED Viewed

@@ -5,9 +5,3 @@ O3,2024-10-24,2024-10-26,16.000984317626852
 NO2,2024-10-24,2024-10-26,25.760307451092384
 O3,2024-10-24,2024-10-27,19.64377495640328
 NO2,2024-10-24,2024-10-27,31.210576791105115
-O3,2024-10-24,2024-10-25,10.33808859423279
-NO2,2024-10-24,2024-10-25,25.68519991558237
-O3,2024-10-24,2024-10-26,16.000984317626852
-NO2,2024-10-24,2024-10-26,25.760307451092384
-O3,2024-10-24,2024-10-27,19.64377495640328
-NO2,2024-10-24,2024-10-27,31.210576791105115

 NO2,2024-10-24,2024-10-26,25.760307451092384
 O3,2024-10-24,2024-10-27,19.64377495640328
 NO2,2024-10-24,2024-10-27,31.210576791105115

src/helper_functions.py CHANGED Viewed

@@ -42,3 +42,4 @@ def pollution_box(label, value, delta):
             <p style="color: {'green' if '+' in delta else 'orange'}; margin: 0;">{delta}</p>
         </div>
         """, unsafe_allow_html=True)

             <p style="color: {'green' if '+' in delta else 'orange'}; margin: 0;">{delta}</p>
         </div>
         """, unsafe_allow_html=True)

src/predict.py CHANGED Viewed

@@ -47,7 +47,7 @@ def get_data_and_predictions():
         prediction_data.append(
             {
                 "pollutant": "O3",
-                "date_predicted": date.today(),
                 "date": date.today() + timedelta(days=i + 1),
                 "prediction_value": o3_predictions[0][i],
             }
@@ -55,7 +55,7 @@ def get_data_and_predictions():
         prediction_data.append(
             {
                 "pollutant": "NO2",
-                "date_predicted": date.today(),
                 "date": date.today() + timedelta(days=i + 1),
                 "prediction_value": no2_predictions[0][i],
             }
@@ -65,10 +65,11 @@ def get_data_and_predictions():
     if os.path.exists(PREDICTIONS_FILE):
         existing_data = pd.read_csv(PREDICTIONS_FILE)
         combined_data = pd.concat([existing_data, predictions_df])
-        combined_data = combined_data.drop_duplicates(
-            subset=["pollutant", "date_predicted", "date"], keep="first"
-        )
     else:
         combined_data = predictions_df

         prediction_data.append(
             {
                 "pollutant": "O3",
+                "predicted_on": date.today(),
                 "date": date.today() + timedelta(days=i + 1),
                 "prediction_value": o3_predictions[0][i],
             }
         prediction_data.append(
             {
                 "pollutant": "NO2",
+                "predicted_on": date.today(),
                 "date": date.today() + timedelta(days=i + 1),
                 "prediction_value": no2_predictions[0][i],
             }
     if os.path.exists(PREDICTIONS_FILE):
         existing_data = pd.read_csv(PREDICTIONS_FILE)
+        # Filter out predictions made today to avoid duplicates
+        existing_data = existing_data[
+            ~(existing_data["predicted_on"] == str(date.today()))
+        ]
         combined_data = pd.concat([existing_data, predictions_df])
     else:
         combined_data = predictions_df

test.ipynb CHANGED Viewed

@@ -267,6 +267,40 @@
    "source": [
     "predictions_NO2"
    ]
   }
  ],
  "metadata": {

    "source": [
     "predictions_NO2"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from src.data_api_calls import get_combined_data"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "ename": "TypeError",
+     "evalue": "'<' not supported between instances of 'Timestamp' and 'str'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[1;31mTypeError\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[1;32mIn[2], line 1\u001b[0m\n\u001b[1;32m----> 1\u001b[0m \u001b[43mget_combined_data\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m2024-10-10\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\data_api_calls.py:136\u001b[0m, in \u001b[0;36mget_combined_data\u001b[1;34m(input_date)\u001b[0m\n\u001b[0;32m    133\u001b[0m     start_date \u001b[38;5;241m=\u001b[39m end_date \u001b[38;5;241m-\u001b[39m timedelta(\u001b[38;5;241m7\u001b[39m)\n\u001b[0;32m    135\u001b[0m update_weather_data(start_date, end_date)\n\u001b[1;32m--> 136\u001b[0m \u001b[43mupdate_pollution_data\u001b[49m\u001b[43m(\u001b[49m\u001b[43mstart_date\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mend_date\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m    138\u001b[0m weather_df \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mread_csv(WEATHER_DATA_FILE)\n\u001b[0;32m    140\u001b[0m weather_df\u001b[38;5;241m.\u001b[39minsert(\u001b[38;5;241m1\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mNO2\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;28;01mNone\u001b[39;00m)\n",
+      "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\src\\data_api_calls.py:123\u001b[0m, in \u001b[0;36mupdate_pollution_data\u001b[1;34m(start_date, end_date)\u001b[0m\n\u001b[0;32m    121\u001b[0m updated_data \u001b[38;5;241m=\u001b[39m pd\u001b[38;5;241m.\u001b[39mconcat([existing_data, new_data], ignore_index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[0;32m    122\u001b[0m updated_data\u001b[38;5;241m.\u001b[39mdrop_duplicates(subset\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mdate\u001b[39m\u001b[38;5;124m\"\u001b[39m, keep\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mlast\u001b[39m\u001b[38;5;124m\"\u001b[39m, inplace\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m--> 123\u001b[0m updated_data \u001b[38;5;241m=\u001b[39m \u001b[43mupdated_data\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msort_values\u001b[49m\u001b[43m(\u001b[49m\u001b[43mby\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mdate\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m    124\u001b[0m updated_data\u001b[38;5;241m.\u001b[39mto_csv(POLLUTION_DATA_FILE, index\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mFalse\u001b[39;00m)\n",
+      "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\frame.py:7200\u001b[0m, in \u001b[0;36mDataFrame.sort_values\u001b[1;34m(self, by, axis, ascending, inplace, kind, na_position, ignore_index, key)\u001b[0m\n\u001b[0;32m   7197\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(ascending, (\u001b[38;5;28mtuple\u001b[39m, \u001b[38;5;28mlist\u001b[39m)):\n\u001b[0;32m   7198\u001b[0m         ascending \u001b[38;5;241m=\u001b[39m ascending[\u001b[38;5;241m0\u001b[39m]\n\u001b[1;32m-> 7200\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m \u001b[43mnargsort\u001b[49m\u001b[43m(\u001b[49m\n\u001b[0;32m   7201\u001b[0m \u001b[43m        \u001b[49m\u001b[43mk\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mascending\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mascending\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mna_position\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mna_position\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mkey\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkey\u001b[49m\n\u001b[0;32m   7202\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m   7203\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m   7204\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m inplace:\n",
+      "File \u001b[1;32mc:\\Users\\elikl\\Documents\\Uni\\yr3\\ML for industry\\utrecht-pollution-prediction\\.venv\\Lib\\site-packages\\pandas\\core\\sorting.py:439\u001b[0m, in \u001b[0;36mnargsort\u001b[1;34m(items, kind, ascending, na_position, key, mask)\u001b[0m\n\u001b[0;32m    437\u001b[0m     non_nans \u001b[38;5;241m=\u001b[39m non_nans[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[0;32m    438\u001b[0m     non_nan_idx \u001b[38;5;241m=\u001b[39m non_nan_idx[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n\u001b[1;32m--> 439\u001b[0m indexer \u001b[38;5;241m=\u001b[39m non_nan_idx[\u001b[43mnon_nans\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43margsort\u001b[49m\u001b[43m(\u001b[49m\u001b[43mkind\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mkind\u001b[49m\u001b[43m)\u001b[49m]\n\u001b[0;32m    440\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m ascending:\n\u001b[0;32m    441\u001b[0m     indexer \u001b[38;5;241m=\u001b[39m indexer[::\u001b[38;5;241m-\u001b[39m\u001b[38;5;241m1\u001b[39m]\n",
+      "\u001b[1;31mTypeError\u001b[0m: '<' not supported between instances of 'Timestamp' and 'str'"
+     ]
+    }
+   ],
+   "source": [
+    "get_combined_data(\"2024-10-10\")"
+   ]
   }
  ],
  "metadata": {

weather_data.csv CHANGED Viewed

@@ -1,4 +1,18 @@
 date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
 2024-10-17,16.9,86.0,0.6,18.4,1010.0,37.1,43.0
 2024-10-18,15.5,97.3,3.9,7.6,1014.0,4.5,42.9
 2024-10-19,14.7,89.9,1.6,14.8,1014.1,22.8,43.5

 date,temp,humidity,precip,windspeed,sealevelpressure,visibility,solarradiation
+2024-10-03,9.9,78.9,0.0,25.9,1020.0,44.0,81.8
+2024-10-04,7.7,86.2,0.0,11.2,1022.0,25.0,86.3
+2024-10-05,8.0,83.2,0.0,14.8,1019.0,26.0,76.5
+2024-10-06,10.0,82.7,0.0,25.9,1007.7,36.5,77.2
+2024-10-07,15.3,81.0,0.4,22.3,1000.8,37.5,65.6
+2024-10-08,16.0,83.4,0.6,18.4,997.4,40.0,55.6
+2024-10-09,14.2,88.3,1.0,22.3,990.1,37.2,38.2
+2024-10-10,12.3,83.6,1.0,18.4,997.3,28.2,52.5
+2024-10-11,8.5,84.4,1.0,14.8,1015.3,36.1,62.9
+2024-10-12,7.8,89.4,0.0,21.6,1011.3,25.5,49.3
+2024-10-13,10.5,75.8,3.7,18.4,1013.2,36.8,49.9
+2024-10-14,7.9,89.8,0.0,10.8,1018.3,24.3,44.1
+2024-10-15,8.7,86.9,0.0,18.4,1019.4,29.0,71.2
+2024-10-16,15.1,82.2,0.0,22.3,1010.3,35.8,40.8
 2024-10-17,16.9,86.0,0.6,18.4,1010.0,37.1,43.0
 2024-10-18,15.5,97.3,3.9,7.6,1014.0,4.5,42.9
 2024-10-19,14.7,89.9,1.6,14.8,1014.1,22.8,43.5