import numpy as np import pandas as pd import plotly.graph_objects as go import streamlit as st from sklearn.metrics import mean_squared_error from src.data_api_calls import get_combined_data USERNAME = "admin" PASSWORD = "password" st.title("Admin Panel") # Use session state to remember login state if "login_success" not in st.session_state: st.session_state.login_success = False # Login Form if not st.session_state.login_success: with st.form("login_form"): st.write("Please login to access the admin dashboard:") username = st.text_input("Username") password = st.text_input("Password", type="password") login_button = st.form_submit_button("Login") if login_button: if username == USERNAME and password == PASSWORD: st.session_state.login_success = True st.success("Login successful!") else: st.error("Invalid username or password.") else: # Fetching the combined data table_data = get_combined_data() # Check for missing values missing_values = table_data.isnull() # Display the main data table st.subheader("Data used for the prediction") # Display message based on whether data is complete if missing_values.values.any(): # Warning message if there are missing values st.markdown( "

Warning: Some data is missing!

", unsafe_allow_html=True, ) # Identify columns with missing values missing_columns = table_data.columns[missing_values.any()].tolist() # Identify rows (dates) with missing values missing_rows = table_data[missing_values.any(axis=1)]["Date"].tolist() # Display additional information about missing columns and rows if missing_columns: st.markdown(f"**Columns with missing data:** {', '.join(missing_columns)}") if missing_rows: st.markdown( f"**Rows with missing data (dates):** {', '.join(missing_rows)}" ) else: # Success message if no data is missing st.markdown( "

All data is complete!

", unsafe_allow_html=True, ) st.dataframe(table_data) # Actual data vs 1,2,3 days ahead predictions actual_data = pd.read_csv("pollution_data.csv") prediction_data = pd.read_csv("predictions_history.csv") col1, col2 = st.columns(2) with col1: pollutant = st.radio("Select a pollutant", ("O3", "NO2")) with col2: days_ahead = st.radio("Select days ahead for prediction", (1, 2, 3)) predictions = prediction_data[prediction_data["pollutant"] == pollutant] actual = actual_data[["date", pollutant]].rename( columns={pollutant: "actual_value"} ) predictions_filtered = predictions[ predictions["date_predicted"] == ( pd.to_datetime(predictions["date"]) - pd.Timedelta(days=days_ahead) ).dt.strftime("%Y-%m-%d") ] fig = go.Figure() fig.add_trace( go.Scatter( x=actual["date"], y=actual["actual_value"], mode="lines+markers", name="Ground Truth", line=dict(color="green", width=3), ) ) fig.add_trace( go.Scatter( x=predictions_filtered["date"], y=predictions_filtered["prediction_value"], mode="lines+markers", name=f"Prediction {days_ahead} day(s) ahead", line=dict(dash="dash", color="orange", width=3), ) ) fig.update_layout( title=f"{pollutant} Predictions vs Actual Values", xaxis_title="Date", yaxis_title=f"{pollutant} Concentration", legend=dict(x=0, y=1), yaxis=dict(range=[0, 60]), template="plotly_white", xaxis=dict( title="Date", type="date", tickmode="array", tickvals=predictions["date"], tickformat="%d-%b", tickangle=-45, tickcolor="gray", ), ) st.plotly_chart(fig) # Evaluation Function def evaluate_predictions_all_days(actual, predictions): rmse_values_all = {"O3": [], "NO2": []} smape_values_all = {"O3": [], "NO2": []} for pollutant in ["O3", "NO2"]: predictions_pollutant = predictions[predictions["pollutant"] == pollutant] actual_pollutant = actual_data[["date", pollutant]].rename( columns={pollutant: "actual_value"} ) # Calculate RMSE and SMAPE for each day (1st, 2nd, and 3rd) for i in range(1, 4): predictions_filtered = predictions_pollutant[ predictions_pollutant["date_predicted"] == ( pd.to_datetime(predictions_pollutant["date"]) - pd.Timedelta(days=i) ).dt.strftime("%Y-%m-%d") ] actual_filtered = actual_pollutant[ actual_pollutant["date"].isin(predictions_filtered["date"]) ] merged = pd.merge( actual_filtered, predictions_filtered, left_on="date", right_on="date", ) if not merged.empty: actual_values = merged["actual_value"].values prediction_values = merged["prediction_value"].values rmse = np.sqrt(mean_squared_error(actual_values, prediction_values)) rmse_values_all[pollutant].append(rmse) smape = ( 100 / len(actual_values) * np.sum( 2 * np.abs(prediction_values - actual_values) / (np.abs(actual_values) + np.abs(prediction_values)) ) ) smape_values_all[pollutant].append(smape) # Plot RMSE and SMAPE for both pollutants fig_rmse = go.Figure() for day in range(3): fig_rmse.add_trace( go.Bar( x=["O3", "NO2"], y=[rmse_values_all["O3"][day], rmse_values_all["NO2"][day]], name=f"Day {day + 1}", ) ) fig_rmse.update_layout( title="RMSE for Predictions Over 3 Days", yaxis_title="RMSE", xaxis_title="Pollutant", barmode="group", ) st.plotly_chart(fig_rmse) fig_smape = go.Figure() for day in range(3): fig_smape.add_trace( go.Bar( x=["O3", "NO2"], y=[smape_values_all["O3"][day], smape_values_all["NO2"][day]], name=f"Day {day + 1}", ) ) fig_smape.update_layout( title="SMAPE for Predictions Over 3 Days", yaxis_title="SMAPE (%)", xaxis_title="Pollutant", barmode="group", ) st.plotly_chart(fig_smape) # Calculate total current SMAPE and RMSE total_O3_smape = sum(smape_values_all["O3"]) / len(smape_values_all) total_NO2_smape = sum(smape_values_all["NO2"]) / len(smape_values_all) total_O3_rmse = sum(rmse_values_all["O3"]) / len(rmse_values_all) total_NO2_rmse = sum(rmse_values_all["NO2"]) / len(rmse_values_all) # Display metrics table metrics_data = { "Metric": [ "Current NO2 SMAPE (%)", "Current NO2 RMSE (µg/m3)", "Current O3 SMAPE (%)", "Current O3 RMSE (µg/m3)", ], "Value": [total_NO2_smape, total_NO2_rmse, total_O3_smape, total_O3_rmse], } metrics_df = pd.DataFrame(metrics_data) st.table(metrics_df) evaluate_predictions_all_days(actual_data, prediction_data)