is_click_predictor / Application /modelConnector.py
KaiquanMah's picture
Upload 39 files
2752969 verified
import os
import joblib
import pandas as pd
from huggingface_hub import hf_hub_download, HfApi
from model_trainer import train_models # Assumes model_trainer.py exists with train_models function
# Hugging Face Model & Dataset Information
MODEL_REPO = "taimax13/is_click_predictor"
MODEL_FILENAME = "rf_model.pkl"
DATA_REPO = "taimax13/is_click_data"
LOCAL_MODEL_PATH = f"models/{MODEL_FILENAME}"
# Hugging Face API
api = HfApi()
class ModelConnector:
def __init__(self):
"""Initialize model connector and check if model exists."""
os.makedirs("models", exist_ok=True)
self.model = self.load_model()
def check_model_exists(self):
"""Check if the model exists on Hugging Face."""
try:
hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
return True
except Exception:
return False
def load_model(self):
"""Download and load the model from Hugging Face."""
if self.check_model_exists():
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
return joblib.load(model_path)
return None
def train_model(self):
"""Train a new model and upload it to Hugging Face."""
try:
# Load dataset
train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
train_data = pd.read_csv(train_data_path)
X_train = train_data.drop(columns=["is_click"])
y_train = train_data["is_click"]
# Train model
models = train_models(X_train, y_train)
rf_model = models["RandomForest"]
# Save locally
joblib.dump(rf_model, LOCAL_MODEL_PATH)
# Upload to Hugging Face
api.upload_file(
path_or_fileobj=LOCAL_MODEL_PATH,
path_in_repo=MODEL_FILENAME,
repo_id=MODEL_REPO,
)
self.model = rf_model # Update instance with trained model
return "Model trained and uploaded successfully!"
except Exception as e:
return f"Error during training: {str(e)}"
def retrain_model(self):
"""Retrain the existing model with new data."""
try:
# Load dataset
train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
train_data = pd.read_csv(train_data_path)
X_train = train_data.drop(columns=["is_click"])
y_train = train_data["is_click"]
if self.model is None:
return "No existing model found. Train a new model first."
# Retrain the model
self.model.fit(X_train, y_train)
# Save & upload retrained model
joblib.dump(self.model, LOCAL_MODEL_PATH)
api.upload_file(
path_or_fileobj=LOCAL_MODEL_PATH,
path_in_repo=MODEL_FILENAME,
repo_id=MODEL_REPO,
)
return "Model retrained and uploaded successfully!"
except Exception as e:
return f"Error during retraining: {str(e)}"
def predict(self, input_data):
"""Make predictions using the loaded model."""
if self.model is None:
return "No model found. Train the model first."
input_df = pd.DataFrame([input_data])
prediction = self.model.predict(input_df)[0]
return int(prediction)