Spaces:

chagu13
/

is_click

Sleeping

App Files Files Community

chkp-talexm commited on 20 days ago

Commit

6c67532

1 Parent(s): 18e26de

update

Browse files

Files changed (3) hide show

app.py +22 -133
modelConnector.py +20 -17
requirements.txt +1 -0

app.py CHANGED Viewed

@@ -1,140 +1,17 @@
-# import streamlit as st
-# import pandas as pd
-# import joblib
-# from huggingface_hub import hf_hub_download
-#
-# from modelConnector import ModelConnector
-#
-# # ===========================
-# #  LOAD MODEL & DATASET
-# # ===========================
-#
-# st.title("📊 Is Click Predictor")
-#
-# # Download and load the trained model from Hugging Face
-# model_path = hf_hub_download(repo_id="taimax13/is_click_predictor", filename="rf_model.pkl")
-# rf_model = joblib.load(model_path)
-# st.success("✅ Model Loaded Successfully!")
-#
-# # ===========================
-# #  LOAD DATA FROM HUGGING FACE
-# # ===========================
-#
-# st.sidebar.header("Dataset Selection")
-#
-# # Download required dataset files
-# X_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="X_test_1st(1).csv")
-# y_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="y_test_1st.csv")
-# train_data_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="train_dataset_full - train_dataset_full (1).csv")
-#
-# # Load datasets
-# X_test = pd.read_csv(X_test_path)
-# y_test = pd.read_csv(y_test_path, header=None)  # Ensure labels match test dataset index
-# train_data = pd.read_csv(train_data_path)
-#
-# st.info(f"✅ Loaded datasets: **Train: {len(train_data)} rows**, **Test: {len(X_test)} rows**")
-#
-#
-#
-# # Initialize Model Connector
-# model_connector = ModelConnector()
-#
-# st.title("📊 Is Click Predictor - Train, Retrain, and Predict")
-#
-# # ===========================
-# #  CHECK MODEL STATUS
-# # ===========================
-#
-# if model_connector.model:
-#     st.success("✅ Model Loaded Successfully!")
-# else:
-#     st.warning("⚠ No model found. Please train one first.")
-#
-# # ===========================
-# #  TRAIN MODEL IF NOT FOUND
-# # ===========================
-#
-# if st.button("🚀 Train Model"):
-#     st.info("🔄 Training model...")
-#     message = model_connector.train_model()
-#     st.success(message)
-#
-# # ===========================
-# #  RETRAIN MODEL
-# # ===========================
-#
-# if st.button("🔄 Retrain Model"):
-#     st.info("🔄 Retraining model with latest data...")
-#     message = model_connector.retrain_model()
-#     st.success(message)
-#
-#
-# # ===========================
-# #  SELECT A DATA SAMPLE
-# # ===========================
-#
-# st.sidebar.header("Select a Test Sample for Prediction")
-#
-# # Merge X_test with y_test for selection (without labels affecting prediction)
-# X_test["actual_click"] = y_test.values
-#
-# # Allow user to pick a row
-# selected_index = st.sidebar.selectbox("Choose a test sample index", X_test.index)
-# selected_row = X_test.loc[selected_index].drop("actual_click")  # Exclude actual label
-#
-# # Display selected row
-# st.write("### Selected Data Sample:")
-# st.dataframe(selected_row.to_frame().T)  # Display as a table
-#
-#
-#
-# # ===========================
-# #  MAKE PREDICTION & EXPORT CSV
-# # ===========================
-#
-# if st.button("Predict Click"):
-#     # Convert selected row to DataFrame for model input
-#     input_data = selected_row.to_frame().T
-#
-#     # Make prediction
-#     prediction = rf_model.predict(input_data)[0]
-#
-#     # Add prediction to DataFrame
-#     input_data["is_click_predicted"] = prediction
-#
-#     # Save prediction as CSV
-#     csv_filename = "prediction_result.csv"
-#     input_data.to_csv(csv_filename, index=False)
-#
-#     # Display Prediction Result
-#     st.subheader("Prediction Result")
-#     if prediction == 1:
-#         st.success("🟢 The model predicts: **User WILL CLICK on the ad!**")
-#     else:
-#         st.warning("🔴 The model predicts: **User WILL NOT CLICK on the ad.**")
-#
-#     # Provide download button for prediction result
-#     st.download_button(
-#         label="📥 Download Prediction Result",
-#         data=input_data.to_csv(index=False).encode("utf-8"),
-#         file_name="prediction_result.csv",
-#         mime="text/csv",
-#     )
-#
-# st.markdown("---")
-# st.info("Select a test row from the **left panel**, click **'Predict Click'**, and download the prediction result as a CSV.")
 import streamlit as st
 import pandas as pd
 import os
-import uuid
-# Directory to store uploaded files
 UPLOAD_DIR = "uploaded_files"
 os.makedirs(UPLOAD_DIR, exist_ok=True)
-st.title("Is_click predictor")
 # Step 1: Upload File
 st.header("Upload a File")
@@ -151,7 +28,20 @@ if uploaded_file and file_name:
     st.success(f"File saved as **{file_name}.{file_extension}**")
-# Step 2: Display and Download File
 st.header("View and Download File")
 files = os.listdir(UPLOAD_DIR)
@@ -162,7 +52,7 @@ if selected_file and selected_file != "No files available":
     # Check file size
     file_size = os.path.getsize(file_path) / (1024 * 1024)  # Convert to MB
-    if file_size > 1:  # If file size > 1MB, show limited content
         st.warning("File is too large. Displaying only the first 5 rows.")
     # Read and display content
@@ -177,4 +67,3 @@ if selected_file and selected_file != "No files available":
     # Button to download full file
     with open(file_path, "rb") as f:
         st.download_button("Download Full File", f, file_name=selected_file)

 import streamlit as st
 import pandas as pd
 import os
+from huggingface_hub import HfApi
+# Hugging Face Dataset Repository Info
+HF_DATASET_REPO = "chagu13/is_click_data"
+HF_USER = "chagu13"  # Your HF username (required for authentication)
 UPLOAD_DIR = "uploaded_files"
+# Ensure the directory exists
 os.makedirs(UPLOAD_DIR, exist_ok=True)
+st.title("Is_click Predictor - Upload to Hugging Face")
 # Step 1: Upload File
 st.header("Upload a File")
     st.success(f"File saved as **{file_name}.{file_extension}**")
+    # Step 2: Upload to Hugging Face
+    if st.button("Upload to Hugging Face"):
+        api = HfApi()
+        try:
+            api.upload_file(
+                path_or_fileobj=file_path,
+                path_in_repo=f"{file_name}.{file_extension}",
+                repo_id=HF_DATASET_REPO,
+            )
+            st.success(f"✅ Successfully uploaded to Hugging Face: {HF_DATASET_REPO}/{file_name}.{file_extension}")
+        except Exception as e:
+            st.error(f"❌ Failed to upload: {e}")
+# Step 3: Display and Download File
 st.header("View and Download File")
 files = os.listdir(UPLOAD_DIR)
     # Check file size
     file_size = os.path.getsize(file_path) / (1024 * 1024)  # Convert to MB
+    if file_size > 1:
         st.warning("File is too large. Displaying only the first 5 rows.")
     # Read and display content
     # Button to download full file
     with open(file_path, "rb") as f:
         st.download_button("Download Full File", f, file_name=selected_file)

modelConnector.py CHANGED Viewed

@@ -2,13 +2,16 @@ import os
 import joblib
 import pandas as pd
 from huggingface_hub import hf_hub_download, HfApi
-from model_trainer import train_models  # Assumes model_trainer.py exists with train_models function
 # Hugging Face Model & Dataset Information
 MODEL_REPO = "taimax13/is_click_predictor"
 MODEL_FILENAME = "rf_model.pkl"
 DATA_REPO = "taimax13/is_click_data"
-LOCAL_MODEL_PATH = f"models/{MODEL_FILENAME}"
 # Hugging Face API
 api = HfApi()
@@ -17,7 +20,7 @@ api = HfApi()
 class ModelConnector:
     def __init__(self):
         """Initialize model connector and check if model exists."""
-        os.makedirs("models", exist_ok=True)
         self.model = self.load_model()
     def check_model_exists(self):
@@ -29,14 +32,14 @@ class ModelConnector:
             return False
     def load_model(self):
-        """Download and load the model from Hugging Face."""
         if self.check_model_exists():
             model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
             return joblib.load(model_path)
         return None
     def train_model(self):
-        """Train a new model and upload it to Hugging Face."""
         try:
             # Load dataset
             train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
@@ -45,12 +48,12 @@ class ModelConnector:
             X_train = train_data.drop(columns=["is_click"])
             y_train = train_data["is_click"]
-            # Train model
-            models = train_models(X_train, y_train)
-            rf_model = models["RandomForest"]
-            # Save locally
-            joblib.dump(rf_model, LOCAL_MODEL_PATH)
             # Upload to Hugging Face
             api.upload_file(
@@ -66,7 +69,7 @@ class ModelConnector:
             return f"Error during training: {str(e)}"
     def retrain_model(self):
-        """Retrain the existing model with new data."""
         try:
             # Load dataset
             train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
@@ -78,11 +81,11 @@ class ModelConnector:
             if self.model is None:
                 return "No existing model found. Train a new model first."
-            # Retrain the model
             self.model.fit(X_train, y_train)
-            # Save & upload retrained model
-            joblib.dump(self.model, LOCAL_MODEL_PATH)
             api.upload_file(
                 path_or_fileobj=LOCAL_MODEL_PATH,
                 path_in_repo=MODEL_FILENAME,
@@ -95,10 +98,10 @@ class ModelConnector:
             return f"Error during retraining: {str(e)}"
     def predict(self, input_data):
-        """Make predictions using the loaded model."""
         if self.model is None:
             return "No model found. Train the model first."
         input_df = pd.DataFrame([input_data])
-        prediction = self.model.predict(input_df)[0]
-        return int(prediction)

 import joblib
 import pandas as pd
 from huggingface_hub import hf_hub_download, HfApi
+from is_click_predictor.model_trainer import train_models
+from is_click_predictor.model_manager import save_models, load_models
+from is_click_predictor.model_predictor import predict
+from is_click_predictor.config import MODEL_DIR  # Ensure consistency
 # Hugging Face Model & Dataset Information
 MODEL_REPO = "taimax13/is_click_predictor"
 MODEL_FILENAME = "rf_model.pkl"
 DATA_REPO = "taimax13/is_click_data"
+LOCAL_MODEL_PATH = f"{MODEL_DIR}/{MODEL_FILENAME}"  # Use config path
 # Hugging Face API
 api = HfApi()
 class ModelConnector:
     def __init__(self):
         """Initialize model connector and check if model exists."""
+        os.makedirs(MODEL_DIR, exist_ok=True)  # Ensure directory exists
         self.model = self.load_model()
     def check_model_exists(self):
             return False
     def load_model(self):
+        """Download and load the model from Hugging Face using is_click_predictor."""
         if self.check_model_exists():
             model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
             return joblib.load(model_path)
         return None
     def train_model(self):
+        """Train a new model using is_click_predictor and upload it to Hugging Face."""
         try:
             # Load dataset
             train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
             X_train = train_data.drop(columns=["is_click"])
             y_train = train_data["is_click"]
+            # Train model using `is_click_predictor`
+            models = train_models(X_train, y_train)  # Uses RandomForest, CatBoost, XGBoost
+            rf_model = models["RandomForest"]  # Use RF as default
+            # Save locally using `is_click_predictor`
+            save_models(models)
             # Upload to Hugging Face
             api.upload_file(
             return f"Error during training: {str(e)}"
     def retrain_model(self):
+        """Retrain the existing model with new data using is_click_predictor."""
         try:
             # Load dataset
             train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
             if self.model is None:
                 return "No existing model found. Train a new model first."
+            # Retrain using is_click_predictor
             self.model.fit(X_train, y_train)
+            # Save and upload
+            save_models({"RandomForest": self.model})
             api.upload_file(
                 path_or_fileobj=LOCAL_MODEL_PATH,
                 path_in_repo=MODEL_FILENAME,
             return f"Error during retraining: {str(e)}"
     def predict(self, input_data):
+        """Make predictions using is_click_predictor."""
         if self.model is None:
             return "No model found. Train the model first."
         input_df = pd.DataFrame([input_data])
+        prediction = predict({"RandomForest": self.model}, input_df)  # Use predict function
+        return int(prediction[0])

requirements.txt CHANGED Viewed

@@ -2,3 +2,4 @@ streamlit
 pandas
 joblib
 huggingface_hub

 pandas
 joblib
 huggingface_hub
+is_click_predictor