chkp-talexm
commited on
Commit
Β·
6c67532
1
Parent(s):
18e26de
update
Browse files- app.py +22 -133
- modelConnector.py +20 -17
- requirements.txt +1 -0
app.py
CHANGED
@@ -1,140 +1,17 @@
|
|
1 |
-
# import streamlit as st
|
2 |
-
# import pandas as pd
|
3 |
-
# import joblib
|
4 |
-
# from huggingface_hub import hf_hub_download
|
5 |
-
#
|
6 |
-
# from modelConnector import ModelConnector
|
7 |
-
#
|
8 |
-
# # ===========================
|
9 |
-
# # LOAD MODEL & DATASET
|
10 |
-
# # ===========================
|
11 |
-
#
|
12 |
-
# st.title("π Is Click Predictor")
|
13 |
-
#
|
14 |
-
# # Download and load the trained model from Hugging Face
|
15 |
-
# model_path = hf_hub_download(repo_id="taimax13/is_click_predictor", filename="rf_model.pkl")
|
16 |
-
# rf_model = joblib.load(model_path)
|
17 |
-
# st.success("β
Model Loaded Successfully!")
|
18 |
-
#
|
19 |
-
# # ===========================
|
20 |
-
# # LOAD DATA FROM HUGGING FACE
|
21 |
-
# # ===========================
|
22 |
-
#
|
23 |
-
# st.sidebar.header("Dataset Selection")
|
24 |
-
#
|
25 |
-
# # Download required dataset files
|
26 |
-
# X_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="X_test_1st(1).csv")
|
27 |
-
# y_test_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="y_test_1st.csv")
|
28 |
-
# train_data_path = hf_hub_download(repo_id="taimax13/is_click_data", filename="train_dataset_full - train_dataset_full (1).csv")
|
29 |
-
#
|
30 |
-
# # Load datasets
|
31 |
-
# X_test = pd.read_csv(X_test_path)
|
32 |
-
# y_test = pd.read_csv(y_test_path, header=None) # Ensure labels match test dataset index
|
33 |
-
# train_data = pd.read_csv(train_data_path)
|
34 |
-
#
|
35 |
-
# st.info(f"β
Loaded datasets: **Train: {len(train_data)} rows**, **Test: {len(X_test)} rows**")
|
36 |
-
#
|
37 |
-
#
|
38 |
-
#
|
39 |
-
# # Initialize Model Connector
|
40 |
-
# model_connector = ModelConnector()
|
41 |
-
#
|
42 |
-
# st.title("π Is Click Predictor - Train, Retrain, and Predict")
|
43 |
-
#
|
44 |
-
# # ===========================
|
45 |
-
# # CHECK MODEL STATUS
|
46 |
-
# # ===========================
|
47 |
-
#
|
48 |
-
# if model_connector.model:
|
49 |
-
# st.success("β
Model Loaded Successfully!")
|
50 |
-
# else:
|
51 |
-
# st.warning("β No model found. Please train one first.")
|
52 |
-
#
|
53 |
-
# # ===========================
|
54 |
-
# # TRAIN MODEL IF NOT FOUND
|
55 |
-
# # ===========================
|
56 |
-
#
|
57 |
-
# if st.button("π Train Model"):
|
58 |
-
# st.info("π Training model...")
|
59 |
-
# message = model_connector.train_model()
|
60 |
-
# st.success(message)
|
61 |
-
#
|
62 |
-
# # ===========================
|
63 |
-
# # RETRAIN MODEL
|
64 |
-
# # ===========================
|
65 |
-
#
|
66 |
-
# if st.button("π Retrain Model"):
|
67 |
-
# st.info("π Retraining model with latest data...")
|
68 |
-
# message = model_connector.retrain_model()
|
69 |
-
# st.success(message)
|
70 |
-
#
|
71 |
-
#
|
72 |
-
# # ===========================
|
73 |
-
# # SELECT A DATA SAMPLE
|
74 |
-
# # ===========================
|
75 |
-
#
|
76 |
-
# st.sidebar.header("Select a Test Sample for Prediction")
|
77 |
-
#
|
78 |
-
# # Merge X_test with y_test for selection (without labels affecting prediction)
|
79 |
-
# X_test["actual_click"] = y_test.values
|
80 |
-
#
|
81 |
-
# # Allow user to pick a row
|
82 |
-
# selected_index = st.sidebar.selectbox("Choose a test sample index", X_test.index)
|
83 |
-
# selected_row = X_test.loc[selected_index].drop("actual_click") # Exclude actual label
|
84 |
-
#
|
85 |
-
# # Display selected row
|
86 |
-
# st.write("### Selected Data Sample:")
|
87 |
-
# st.dataframe(selected_row.to_frame().T) # Display as a table
|
88 |
-
#
|
89 |
-
#
|
90 |
-
#
|
91 |
-
# # ===========================
|
92 |
-
# # MAKE PREDICTION & EXPORT CSV
|
93 |
-
# # ===========================
|
94 |
-
#
|
95 |
-
# if st.button("Predict Click"):
|
96 |
-
# # Convert selected row to DataFrame for model input
|
97 |
-
# input_data = selected_row.to_frame().T
|
98 |
-
#
|
99 |
-
# # Make prediction
|
100 |
-
# prediction = rf_model.predict(input_data)[0]
|
101 |
-
#
|
102 |
-
# # Add prediction to DataFrame
|
103 |
-
# input_data["is_click_predicted"] = prediction
|
104 |
-
#
|
105 |
-
# # Save prediction as CSV
|
106 |
-
# csv_filename = "prediction_result.csv"
|
107 |
-
# input_data.to_csv(csv_filename, index=False)
|
108 |
-
#
|
109 |
-
# # Display Prediction Result
|
110 |
-
# st.subheader("Prediction Result")
|
111 |
-
# if prediction == 1:
|
112 |
-
# st.success("π’ The model predicts: **User WILL CLICK on the ad!**")
|
113 |
-
# else:
|
114 |
-
# st.warning("π΄ The model predicts: **User WILL NOT CLICK on the ad.**")
|
115 |
-
#
|
116 |
-
# # Provide download button for prediction result
|
117 |
-
# st.download_button(
|
118 |
-
# label="π₯ Download Prediction Result",
|
119 |
-
# data=input_data.to_csv(index=False).encode("utf-8"),
|
120 |
-
# file_name="prediction_result.csv",
|
121 |
-
# mime="text/csv",
|
122 |
-
# )
|
123 |
-
#
|
124 |
-
# st.markdown("---")
|
125 |
-
# st.info("Select a test row from the **left panel**, click **'Predict Click'**, and download the prediction result as a CSV.")
|
126 |
-
|
127 |
-
|
128 |
import streamlit as st
|
129 |
import pandas as pd
|
130 |
import os
|
131 |
-
import
|
132 |
|
133 |
-
#
|
|
|
|
|
134 |
UPLOAD_DIR = "uploaded_files"
|
|
|
|
|
135 |
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
136 |
|
137 |
-
st.title("Is_click
|
138 |
|
139 |
# Step 1: Upload File
|
140 |
st.header("Upload a File")
|
@@ -151,7 +28,20 @@ if uploaded_file and file_name:
|
|
151 |
|
152 |
st.success(f"File saved as **{file_name}.{file_extension}**")
|
153 |
|
154 |
-
# Step 2:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
155 |
st.header("View and Download File")
|
156 |
|
157 |
files = os.listdir(UPLOAD_DIR)
|
@@ -162,7 +52,7 @@ if selected_file and selected_file != "No files available":
|
|
162 |
|
163 |
# Check file size
|
164 |
file_size = os.path.getsize(file_path) / (1024 * 1024) # Convert to MB
|
165 |
-
if file_size > 1:
|
166 |
st.warning("File is too large. Displaying only the first 5 rows.")
|
167 |
|
168 |
# Read and display content
|
@@ -177,4 +67,3 @@ if selected_file and selected_file != "No files available":
|
|
177 |
# Button to download full file
|
178 |
with open(file_path, "rb") as f:
|
179 |
st.download_button("Download Full File", f, file_name=selected_file)
|
180 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
import os
|
4 |
+
from huggingface_hub import HfApi
|
5 |
|
6 |
+
# Hugging Face Dataset Repository Info
|
7 |
+
HF_DATASET_REPO = "chagu13/is_click_data"
|
8 |
+
HF_USER = "chagu13" # Your HF username (required for authentication)
|
9 |
UPLOAD_DIR = "uploaded_files"
|
10 |
+
|
11 |
+
# Ensure the directory exists
|
12 |
os.makedirs(UPLOAD_DIR, exist_ok=True)
|
13 |
|
14 |
+
st.title("Is_click Predictor - Upload to Hugging Face")
|
15 |
|
16 |
# Step 1: Upload File
|
17 |
st.header("Upload a File")
|
|
|
28 |
|
29 |
st.success(f"File saved as **{file_name}.{file_extension}**")
|
30 |
|
31 |
+
# Step 2: Upload to Hugging Face
|
32 |
+
if st.button("Upload to Hugging Face"):
|
33 |
+
api = HfApi()
|
34 |
+
try:
|
35 |
+
api.upload_file(
|
36 |
+
path_or_fileobj=file_path,
|
37 |
+
path_in_repo=f"{file_name}.{file_extension}",
|
38 |
+
repo_id=HF_DATASET_REPO,
|
39 |
+
)
|
40 |
+
st.success(f"β
Successfully uploaded to Hugging Face: {HF_DATASET_REPO}/{file_name}.{file_extension}")
|
41 |
+
except Exception as e:
|
42 |
+
st.error(f"β Failed to upload: {e}")
|
43 |
+
|
44 |
+
# Step 3: Display and Download File
|
45 |
st.header("View and Download File")
|
46 |
|
47 |
files = os.listdir(UPLOAD_DIR)
|
|
|
52 |
|
53 |
# Check file size
|
54 |
file_size = os.path.getsize(file_path) / (1024 * 1024) # Convert to MB
|
55 |
+
if file_size > 1:
|
56 |
st.warning("File is too large. Displaying only the first 5 rows.")
|
57 |
|
58 |
# Read and display content
|
|
|
67 |
# Button to download full file
|
68 |
with open(file_path, "rb") as f:
|
69 |
st.download_button("Download Full File", f, file_name=selected_file)
|
|
modelConnector.py
CHANGED
@@ -2,13 +2,16 @@ import os
|
|
2 |
import joblib
|
3 |
import pandas as pd
|
4 |
from huggingface_hub import hf_hub_download, HfApi
|
5 |
-
from model_trainer import train_models
|
|
|
|
|
|
|
6 |
|
7 |
# Hugging Face Model & Dataset Information
|
8 |
MODEL_REPO = "taimax13/is_click_predictor"
|
9 |
MODEL_FILENAME = "rf_model.pkl"
|
10 |
DATA_REPO = "taimax13/is_click_data"
|
11 |
-
LOCAL_MODEL_PATH = f"
|
12 |
|
13 |
# Hugging Face API
|
14 |
api = HfApi()
|
@@ -17,7 +20,7 @@ api = HfApi()
|
|
17 |
class ModelConnector:
|
18 |
def __init__(self):
|
19 |
"""Initialize model connector and check if model exists."""
|
20 |
-
os.makedirs(
|
21 |
self.model = self.load_model()
|
22 |
|
23 |
def check_model_exists(self):
|
@@ -29,14 +32,14 @@ class ModelConnector:
|
|
29 |
return False
|
30 |
|
31 |
def load_model(self):
|
32 |
-
"""Download and load the model from Hugging Face."""
|
33 |
if self.check_model_exists():
|
34 |
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
|
35 |
return joblib.load(model_path)
|
36 |
return None
|
37 |
|
38 |
def train_model(self):
|
39 |
-
"""Train a new model and upload it to Hugging Face."""
|
40 |
try:
|
41 |
# Load dataset
|
42 |
train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
|
@@ -45,12 +48,12 @@ class ModelConnector:
|
|
45 |
X_train = train_data.drop(columns=["is_click"])
|
46 |
y_train = train_data["is_click"]
|
47 |
|
48 |
-
# Train model
|
49 |
-
models = train_models(X_train, y_train)
|
50 |
-
rf_model = models["RandomForest"]
|
51 |
|
52 |
-
# Save locally
|
53 |
-
|
54 |
|
55 |
# Upload to Hugging Face
|
56 |
api.upload_file(
|
@@ -66,7 +69,7 @@ class ModelConnector:
|
|
66 |
return f"Error during training: {str(e)}"
|
67 |
|
68 |
def retrain_model(self):
|
69 |
-
"""Retrain the existing model with new data."""
|
70 |
try:
|
71 |
# Load dataset
|
72 |
train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
|
@@ -78,11 +81,11 @@ class ModelConnector:
|
|
78 |
if self.model is None:
|
79 |
return "No existing model found. Train a new model first."
|
80 |
|
81 |
-
# Retrain
|
82 |
self.model.fit(X_train, y_train)
|
83 |
|
84 |
-
# Save
|
85 |
-
|
86 |
api.upload_file(
|
87 |
path_or_fileobj=LOCAL_MODEL_PATH,
|
88 |
path_in_repo=MODEL_FILENAME,
|
@@ -95,10 +98,10 @@ class ModelConnector:
|
|
95 |
return f"Error during retraining: {str(e)}"
|
96 |
|
97 |
def predict(self, input_data):
|
98 |
-
"""Make predictions using
|
99 |
if self.model is None:
|
100 |
return "No model found. Train the model first."
|
101 |
|
102 |
input_df = pd.DataFrame([input_data])
|
103 |
-
prediction = self.model
|
104 |
-
return int(prediction)
|
|
|
2 |
import joblib
|
3 |
import pandas as pd
|
4 |
from huggingface_hub import hf_hub_download, HfApi
|
5 |
+
from is_click_predictor.model_trainer import train_models
|
6 |
+
from is_click_predictor.model_manager import save_models, load_models
|
7 |
+
from is_click_predictor.model_predictor import predict
|
8 |
+
from is_click_predictor.config import MODEL_DIR # Ensure consistency
|
9 |
|
10 |
# Hugging Face Model & Dataset Information
|
11 |
MODEL_REPO = "taimax13/is_click_predictor"
|
12 |
MODEL_FILENAME = "rf_model.pkl"
|
13 |
DATA_REPO = "taimax13/is_click_data"
|
14 |
+
LOCAL_MODEL_PATH = f"{MODEL_DIR}/{MODEL_FILENAME}" # Use config path
|
15 |
|
16 |
# Hugging Face API
|
17 |
api = HfApi()
|
|
|
20 |
class ModelConnector:
|
21 |
def __init__(self):
|
22 |
"""Initialize model connector and check if model exists."""
|
23 |
+
os.makedirs(MODEL_DIR, exist_ok=True) # Ensure directory exists
|
24 |
self.model = self.load_model()
|
25 |
|
26 |
def check_model_exists(self):
|
|
|
32 |
return False
|
33 |
|
34 |
def load_model(self):
|
35 |
+
"""Download and load the model from Hugging Face using is_click_predictor."""
|
36 |
if self.check_model_exists():
|
37 |
model_path = hf_hub_download(repo_id=MODEL_REPO, filename=MODEL_FILENAME)
|
38 |
return joblib.load(model_path)
|
39 |
return None
|
40 |
|
41 |
def train_model(self):
|
42 |
+
"""Train a new model using is_click_predictor and upload it to Hugging Face."""
|
43 |
try:
|
44 |
# Load dataset
|
45 |
train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
|
|
|
48 |
X_train = train_data.drop(columns=["is_click"])
|
49 |
y_train = train_data["is_click"]
|
50 |
|
51 |
+
# Train model using `is_click_predictor`
|
52 |
+
models = train_models(X_train, y_train) # Uses RandomForest, CatBoost, XGBoost
|
53 |
+
rf_model = models["RandomForest"] # Use RF as default
|
54 |
|
55 |
+
# Save locally using `is_click_predictor`
|
56 |
+
save_models(models)
|
57 |
|
58 |
# Upload to Hugging Face
|
59 |
api.upload_file(
|
|
|
69 |
return f"Error during training: {str(e)}"
|
70 |
|
71 |
def retrain_model(self):
|
72 |
+
"""Retrain the existing model with new data using is_click_predictor."""
|
73 |
try:
|
74 |
# Load dataset
|
75 |
train_data_path = hf_hub_download(repo_id=DATA_REPO, filename="train_dataset_full.csv")
|
|
|
81 |
if self.model is None:
|
82 |
return "No existing model found. Train a new model first."
|
83 |
|
84 |
+
# Retrain using is_click_predictor
|
85 |
self.model.fit(X_train, y_train)
|
86 |
|
87 |
+
# Save and upload
|
88 |
+
save_models({"RandomForest": self.model})
|
89 |
api.upload_file(
|
90 |
path_or_fileobj=LOCAL_MODEL_PATH,
|
91 |
path_in_repo=MODEL_FILENAME,
|
|
|
98 |
return f"Error during retraining: {str(e)}"
|
99 |
|
100 |
def predict(self, input_data):
|
101 |
+
"""Make predictions using is_click_predictor."""
|
102 |
if self.model is None:
|
103 |
return "No model found. Train the model first."
|
104 |
|
105 |
input_df = pd.DataFrame([input_data])
|
106 |
+
prediction = predict({"RandomForest": self.model}, input_df) # Use predict function
|
107 |
+
return int(prediction[0])
|
requirements.txt
CHANGED
@@ -2,3 +2,4 @@ streamlit
|
|
2 |
pandas
|
3 |
joblib
|
4 |
huggingface_hub
|
|
|
|
2 |
pandas
|
3 |
joblib
|
4 |
huggingface_hub
|
5 |
+
is_click_predictor
|