import os from datetime import date, timedelta import joblib import pandas as pd from dotenv import load_dotenv from huggingface_hub import hf_hub_download, login from src.data_api_calls import get_combined_data from src.features_pipeline import create_features def load_model(particle): load_dotenv() login(token=os.getenv("HUGGINGFACE_DOWNLOAD_TOKEN")) repo_id = f"elisaklunder/Utrecht-{particle}-Forecasting-Model" if particle == "O3": file_name = "O3_svr_model.pkl" elif particle == "NO2": file_name = "NO2_svr_model.pkl" model_path = hf_hub_download(repo_id=repo_id, filename=file_name) model = joblib.load(model_path) return model def run_model(particle, data): input_data = create_features(data=data, target_particle=particle) model = load_model(particle) prediction = model.predict(input_data) target_scaler = joblib.load(f"scalers/target_scaler_{particle}.joblib") prediction = target_scaler.inverse_transform(prediction) return prediction def get_data_and_predictions(): PREDICTIONS_FILE = "predictions_history.csv" week_data = get_combined_data() o3_predictions = run_model("O3", data=week_data) no2_predictions = run_model("NO2", data=week_data) prediction_data = [] for i in range(3): prediction_data.append( { "pollutant": "O3", "date_predicted": date.today(), "date": date.today() + timedelta(days=i + 1), "prediction_value": o3_predictions[0][i], } ) prediction_data.append( { "pollutant": "NO2", "date_predicted": date.today(), "date": date.today() + timedelta(days=i + 1), "prediction_value": no2_predictions[0][i], } ) predictions_df = pd.DataFrame(prediction_data) if os.path.exists(PREDICTIONS_FILE): existing_data = pd.read_csv(PREDICTIONS_FILE) combined_data = pd.concat([existing_data, predictions_df]) combined_data = combined_data.drop_duplicates( subset=["pollutant", "date_predicted", "date"], keep="first" ) else: combined_data = predictions_df combined_data.to_csv(PREDICTIONS_FILE, index=False) return week_data, o3_predictions, no2_predictions