import os import joblib import pandas as pd from huggingface_hub import hf_hub_download, HfApi from model_trainer import train_models # Assumes model_trainer.py exists with train_models function # Hugging Face Model & Dataset Information MODEL_REPO = "taimax13/is_click_predictor" MODEL_FILENAME = "rf_model.pkl" DATA_REPO = "taimax13/is_click_data" LOCAL_MODEL_PATH = f"models/{MODEL_FILENAME}" # Hugging Face API api = HfApi() class ModelConnector: def __init__(self): """Initialize model connector and check if model exists.""" os.makedirs("models", exist_ok=True) self.model = self.load_model() def check_model_exists(self): """Check if the model exists on Hugging Face.""" try: hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME) return True except Exception: return False def load_model(self): """Download and load the model from Hugging Face.""" if self.check_model_exists(): model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME) return joblib.load(model_path) return None def train_model(self): """Train a new model and upload it to Hugging Face.""" try: # Load dataset train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv") train_data = pd.read_csv(train_data_path) X_train = train_data.drop(columns=["is_click"]) y_train = train_data["is_click"] # Train model models = train_models(X_train, y_train) rf_model = models["RandomForest"] # Save locally joblib.dump(rf_model, LOCAL_MODEL_PATH) # Upload to Hugging Face api.upload_file( path_or_fileobj=LOCAL_MODEL_PATH, path_in_repo=MODEL_FILENAME, repo_id=MODEL_REPO, ) self.model = rf_model # Update instance with trained model return "Model trained and uploaded successfully!" except Exception as e: return f"Error during training: {str(e)}" def retrain_model(self): """Retrain the existing model with new data.""" try: # Load dataset train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv") train_data = pd.read_csv(train_data_path) X_train = train_data.drop(columns=["is_click"]) y_train = train_data["is_click"] if self.model is None: return "No existing model found. Train a new model first." # Retrain the model self.model.fit(X_train, y_train) # Save & upload retrained model joblib.dump(self.model, LOCAL_MODEL_PATH) api.upload_file( path_or_fileobj=LOCAL_MODEL_PATH, path_in_repo=MODEL_FILENAME, repo_id=MODEL_REPO, ) return "Model retrained and uploaded successfully!" except Exception as e: return f"Error during retraining: {str(e)}" def predict(self, input_data): """Make predictions using the loaded model.""" if self.model is None: return "No model found. Train the model first." input_df = pd.DataFrame([input_data]) prediction = self.model.predict(input_df)[0] return int(prediction)