chkp-talexm commited on
Commit
6c67532
Β·
1 Parent(s): 18e26de
Files changed (3) hide show
  1. app.py +22 -133
  2. modelConnector.py +20 -17
  3. requirements.txt +1 -0
app.py CHANGED
@@ -1,140 +1,17 @@
1
- # import streamlit as st
2
- # import pandas as pd
3
- # import joblib
4
- # from huggingface_hub import hf_hub_download
5
- #
6
- # from modelConnector import ModelConnector
7
- #
8
- # # ===========================
9
- # # LOAD MODEL & DATASET
10
- # # ===========================
11
- #
12
- # st.title("πŸ“Š Is Click Predictor")
13
- #
14
- # # Download and load the trained model from Hugging Face
15
- # model_path = hf_hub_download(repo_id="taimax13/is_click_predictor", filename="rf_model.pkl")
16
- # rf_model = joblib.load(model_path)
17
- # st.success("βœ… Model Loaded Successfully!")
18
- #
19
- # # ===========================
20
- # # LOAD DATA FROM HUGGING FACE
21
- # # ===========================
22
- #
23
- # st.sidebar.header("Dataset Selection")
24
- #
25
- # # Download required dataset files
26
- # X_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="X_test_1st(1).csv")
27
- # y_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="y_test_1st.csv")
28
- # train_data_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="train_dataset_full - train_dataset_full (1).csv")
29
- #
30
- # # Load datasets
31
- # X_test = pd.read_csv(X_test_path)
32
- # y_test = pd.read_csv(y_test_path, header=None) # Ensure labels match test dataset index
33
- # train_data = pd.read_csv(train_data_path)
34
- #
35
- # st.info(f"βœ… Loaded datasets: **Train: {len(train_data)} rows**, **Test: {len(X_test)} rows**")
36
- #
37
- #
38
- #
39
- # # Initialize Model Connector
40
- # model_connector = ModelConnector()
41
- #
42
- # st.title("πŸ“Š Is Click Predictor - Train, Retrain, and Predict")
43
- #
44
- # # ===========================
45
- # # CHECK MODEL STATUS
46
- # # ===========================
47
- #
48
- # if model_connector.model:
49
- # st.success("βœ… Model Loaded Successfully!")
50
- # else:
51
- # st.warning("⚠ No model found. Please train one first.")
52
- #
53
- # # ===========================
54
- # # TRAIN MODEL IF NOT FOUND
55
- # # ===========================
56
- #
57
- # if st.button("πŸš€ Train Model"):
58
- # st.info("πŸ”„ Training model...")
59
- # message = model_connector.train_model()
60
- # st.success(message)
61
- #
62
- # # ===========================
63
- # # RETRAIN MODEL
64
- # # ===========================
65
- #
66
- # if st.button("πŸ”„ Retrain Model"):
67
- # st.info("πŸ”„ Retraining model with latest data...")
68
- # message = model_connector.retrain_model()
69
- # st.success(message)
70
- #
71
- #
72
- # # ===========================
73
- # # SELECT A DATA SAMPLE
74
- # # ===========================
75
- #
76
- # st.sidebar.header("Select a Test Sample for Prediction")
77
- #
78
- # # Merge X_test with y_test for selection (without labels affecting prediction)
79
- # X_test["actual_click"] = y_test.values
80
- #
81
- # # Allow user to pick a row
82
- # selected_index = st.sidebar.selectbox("Choose a test sample index", X_test.index)
83
- # selected_row = X_test.loc[selected_index].drop("actual_click") # Exclude actual label
84
- #
85
- # # Display selected row
86
- # st.write("### Selected Data Sample:")
87
- # st.dataframe(selected_row.to_frame().T) # Display as a table
88
- #
89
- #
90
- #
91
- # # ===========================
92
- # # MAKE PREDICTION & EXPORT CSV
93
- # # ===========================
94
- #
95
- # if st.button("Predict Click"):
96
- # # Convert selected row to DataFrame for model input
97
- # input_data = selected_row.to_frame().T
98
- #
99
- # # Make prediction
100
- # prediction = rf_model.predict(input_data)[0]
101
- #
102
- # # Add prediction to DataFrame
103
- # input_data["is_click_predicted"] = prediction
104
- #
105
- # # Save prediction as CSV
106
- # csv_filename = "prediction_result.csv"
107
- # input_data.to_csv(csv_filename, index=False)
108
- #
109
- # # Display Prediction Result
110
- # st.subheader("Prediction Result")
111
- # if prediction == 1:
112
- # st.success("🟒 The model predicts: **User WILL CLICK on the ad!**")
113
- # else:
114
- # st.warning("πŸ”΄ The model predicts: **User WILL NOT CLICK on the ad.**")
115
- #
116
- # # Provide download button for prediction result
117
- # st.download_button(
118
- # label="πŸ“₯ Download Prediction Result",
119
- # data=input_data.to_csv(index=False).encode("utf-8"),
120
- # file_name="prediction_result.csv",
121
- # mime="text/csv",
122
- # )
123
- #
124
- # st.markdown("---")
125
- # st.info("Select a test row from the **left panel**, click **'Predict Click'**, and download the prediction result as a CSV.")
126
-
127
-
128
  import streamlit as st
129
  import pandas as pd
130
  import os
131
- import uuid
132
 
133
- # Directory to store uploaded files
 
 
134
  UPLOAD_DIR = "uploaded_files"
 
 
135
  os.makedirs(UPLOAD_DIR, exist_ok=True)
136
 
137
- st.title("Is_click predictor")
138
 
139
  # Step 1: Upload File
140
  st.header("Upload a File")
@@ -151,7 +28,20 @@ if uploaded_file and file_name:
151
 
152
  st.success(f"File saved as **{file_name}.{file_extension}**")
153
 
154
- # Step 2: Display and Download File
 
 
 
 
 
 
 
 
 
 
 
 
 
155
  st.header("View and Download File")
156
 
157
  files = os.listdir(UPLOAD_DIR)
@@ -162,7 +52,7 @@ if selected_file and selected_file != "No files available":
162
 
163
  # Check file size
164
  file_size = os.path.getsize(file_path) / (1024 * 1024) # Convert to MB
165
- if file_size > 1: # If file size > 1MB, show limited content
166
  st.warning("File is too large. Displaying only the first 5 rows.")
167
 
168
  # Read and display content
@@ -177,4 +67,3 @@ if selected_file and selected_file != "No files available":
177
  # Button to download full file
178
  with open(file_path, "rb") as f:
179
  st.download_button("Download Full File", f, file_name=selected_file)
180
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
  import pandas as pd
3
  import os
4
+ from huggingface_hub import HfApi
5
 
6
+ # Hugging Face Dataset Repository Info
7
+ HF_DATASET_REPO = "chagu13/is_click_data"
8
+ HF_USER = "chagu13" # Your HF username (required for authentication)
9
  UPLOAD_DIR = "uploaded_files"
10
+
11
+ # Ensure the directory exists
12
  os.makedirs(UPLOAD_DIR, exist_ok=True)
13
 
14
+ st.title("Is_click Predictor - Upload to Hugging Face")
15
 
16
  # Step 1: Upload File
17
  st.header("Upload a File")
 
28
 
29
  st.success(f"File saved as **{file_name}.{file_extension}**")
30
 
31
+ # Step 2: Upload to Hugging Face
32
+ if st.button("Upload to Hugging Face"):
33
+ api = HfApi()
34
+ try:
35
+ api.upload_file(
36
+ path_or_fileobj=file_path,
37
+ path_in_repo=f"{file_name}.{file_extension}",
38
+ repo_id=HF_DATASET_REPO,
39
+ )
40
+ st.success(f"βœ… Successfully uploaded to Hugging Face: {HF_DATASET_REPO}/{file_name}.{file_extension}")
41
+ except Exception as e:
42
+ st.error(f"❌ Failed to upload: {e}")
43
+
44
+ # Step 3: Display and Download File
45
  st.header("View and Download File")
46
 
47
  files = os.listdir(UPLOAD_DIR)
 
52
 
53
  # Check file size
54
  file_size = os.path.getsize(file_path) / (1024 * 1024) # Convert to MB
55
+ if file_size > 1:
56
  st.warning("File is too large. Displaying only the first 5 rows.")
57
 
58
  # Read and display content
 
67
  # Button to download full file
68
  with open(file_path, "rb") as f:
69
  st.download_button("Download Full File", f, file_name=selected_file)
 
modelConnector.py CHANGED
@@ -2,13 +2,16 @@ import os
2
  import joblib
3
  import pandas as pd
4
  from huggingface_hub import hf_hub_download, HfApi
5
- from model_trainer import train_models # Assumes model_trainer.py exists with train_models function
 
 
 
6
 
7
  # Hugging Face Model & Dataset Information
8
  MODEL_REPO = "taimax13/is_click_predictor"
9
  MODEL_FILENAME = "rf_model.pkl"
10
  DATA_REPO = "taimax13/is_click_data"
11
- LOCAL_MODEL_PATH = f"models/{MODEL_FILENAME}"
12
 
13
  # Hugging Face API
14
  api = HfApi()
@@ -17,7 +20,7 @@ api = HfApi()
17
  class ModelConnector:
18
  def __init__(self):
19
  """Initialize model connector and check if model exists."""
20
- os.makedirs("models", exist_ok=True)
21
  self.model = self.load_model()
22
 
23
  def check_model_exists(self):
@@ -29,14 +32,14 @@ class ModelConnector:
29
  return False
30
 
31
  def load_model(self):
32
- """Download and load the model from Hugging Face."""
33
  if self.check_model_exists():
34
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
35
  return joblib.load(model_path)
36
  return None
37
 
38
  def train_model(self):
39
- """Train a new model and upload it to Hugging Face."""
40
  try:
41
  # Load dataset
42
  train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
@@ -45,12 +48,12 @@ class ModelConnector:
45
  X_train = train_data.drop(columns=["is_click"])
46
  y_train = train_data["is_click"]
47
 
48
- # Train model
49
- models = train_models(X_train, y_train)
50
- rf_model = models["RandomForest"]
51
 
52
- # Save locally
53
- joblib.dump(rf_model, LOCAL_MODEL_PATH)
54
 
55
  # Upload to Hugging Face
56
  api.upload_file(
@@ -66,7 +69,7 @@ class ModelConnector:
66
  return f"Error during training: {str(e)}"
67
 
68
  def retrain_model(self):
69
- """Retrain the existing model with new data."""
70
  try:
71
  # Load dataset
72
  train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
@@ -78,11 +81,11 @@ class ModelConnector:
78
  if self.model is None:
79
  return "No existing model found. Train a new model first."
80
 
81
- # Retrain the model
82
  self.model.fit(X_train, y_train)
83
 
84
- # Save & upload retrained model
85
- joblib.dump(self.model, LOCAL_MODEL_PATH)
86
  api.upload_file(
87
  path_or_fileobj=LOCAL_MODEL_PATH,
88
  path_in_repo=MODEL_FILENAME,
@@ -95,10 +98,10 @@ class ModelConnector:
95
  return f"Error during retraining: {str(e)}"
96
 
97
  def predict(self, input_data):
98
- """Make predictions using the loaded model."""
99
  if self.model is None:
100
  return "No model found. Train the model first."
101
 
102
  input_df = pd.DataFrame([input_data])
103
- prediction = self.model.predict(input_df)[0]
104
- return int(prediction)
 
2
  import joblib
3
  import pandas as pd
4
  from huggingface_hub import hf_hub_download, HfApi
5
+ from is_click_predictor.model_trainer import train_models
6
+ from is_click_predictor.model_manager import save_models, load_models
7
+ from is_click_predictor.model_predictor import predict
8
+ from is_click_predictor.config import MODEL_DIR # Ensure consistency
9
 
10
  # Hugging Face Model & Dataset Information
11
  MODEL_REPO = "taimax13/is_click_predictor"
12
  MODEL_FILENAME = "rf_model.pkl"
13
  DATA_REPO = "taimax13/is_click_data"
14
+ LOCAL_MODEL_PATH = f"{MODEL_DIR}/{MODEL_FILENAME}" # Use config path
15
 
16
  # Hugging Face API
17
  api = HfApi()
 
20
  class ModelConnector:
21
  def __init__(self):
22
  """Initialize model connector and check if model exists."""
23
+ os.makedirs(MODEL_DIR, exist_ok=True) # Ensure directory exists
24
  self.model = self.load_model()
25
 
26
  def check_model_exists(self):
 
32
  return False
33
 
34
  def load_model(self):
35
+ """Download and load the model from Hugging Face using is_click_predictor."""
36
  if self.check_model_exists():
37
  model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
38
  return joblib.load(model_path)
39
  return None
40
 
41
  def train_model(self):
42
+ """Train a new model using is_click_predictor and upload it to Hugging Face."""
43
  try:
44
  # Load dataset
45
  train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
 
48
  X_train = train_data.drop(columns=["is_click"])
49
  y_train = train_data["is_click"]
50
 
51
+ # Train model using `is_click_predictor`
52
+ models = train_models(X_train, y_train) # Uses RandomForest, CatBoost, XGBoost
53
+ rf_model = models["RandomForest"] # Use RF as default
54
 
55
+ # Save locally using `is_click_predictor`
56
+ save_models(models)
57
 
58
  # Upload to Hugging Face
59
  api.upload_file(
 
69
  return f"Error during training: {str(e)}"
70
 
71
  def retrain_model(self):
72
+ """Retrain the existing model with new data using is_click_predictor."""
73
  try:
74
  # Load dataset
75
  train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
 
81
  if self.model is None:
82
  return "No existing model found. Train a new model first."
83
 
84
+ # Retrain using is_click_predictor
85
  self.model.fit(X_train, y_train)
86
 
87
+ # Save and upload
88
+ save_models({"RandomForest": self.model})
89
  api.upload_file(
90
  path_or_fileobj=LOCAL_MODEL_PATH,
91
  path_in_repo=MODEL_FILENAME,
 
98
  return f"Error during retraining: {str(e)}"
99
 
100
  def predict(self, input_data):
101
+ """Make predictions using is_click_predictor."""
102
  if self.model is None:
103
  return "No model found. Train the model first."
104
 
105
  input_df = pd.DataFrame([input_data])
106
+ prediction = predict({"RandomForest": self.model}, input_df) # Use predict function
107
+ return int(prediction[0])
requirements.txt CHANGED
@@ -2,3 +2,4 @@ streamlit
2
  pandas
3
  joblib
4
  huggingface_hub
 
 
2
  pandas
3
  joblib
4
  huggingface_hub
5
+ is_click_predictor