Spaces:
Sleeping
Sleeping
import joblib | |
import pandas as pd | |
from sklearn.base import BaseEstimator, TransformerMixin | |
from sklearn.preprocessing import OneHotEncoder | |
from sklearn.cluster import KMeans | |
# Custom Transformer: UnitPriceTransformer | |
class UnitPriceTransformer(BaseEstimator, TransformerMixin): | |
def fit(self, X, y=None): | |
return self | |
def transform(self, X): | |
X = X.copy() # Work on a copy to avoid SettingWithCopyWarning | |
X['unit_price'] = X['sales'] / X['quantity'] | |
return X[['unit_price']] | |
# Custom Transformer: KMeansAndLabelTransformer | |
class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin): | |
def __init__(self, n_clusters=3): | |
self.n_clusters = n_clusters | |
self.kmeans = KMeans(n_clusters=n_clusters, random_state=42) | |
def fit(self, X, y=None): | |
self.kmeans.fit(X[['unit_price']]) | |
return self | |
def transform(self, X): | |
X = X.copy() # Work on a copy to avoid SettingWithCopyWarning | |
cluster_labels = self.kmeans.predict(X[['unit_price']]) | |
X['distinct_cluster_label'] = cluster_labels.astype(str) + "_" + X['sub_category'] | |
return X[['distinct_cluster_label']] | |
# Custom Transformer: DynamicOneHotEncoder | |
class DynamicOneHotEncoder(BaseEstimator, TransformerMixin): | |
def __init__(self): | |
self.encoder = OneHotEncoder(handle_unknown='ignore') | |
def fit(self, X, y=None): | |
self.encoder.fit(X[['distinct_cluster_label']]) | |
return self | |
def transform(self, X): | |
X = X.copy() # Work on a copy to avoid SettingWithCopyWarning | |
encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray() | |
# Create a DataFrame with the encoded features | |
encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label'])) | |
return encoded_df | |
# Load the pipeline and model | |
pipeline = joblib.load('full_pipeline_with_unit_price.pkl') | |
model = joblib.load('best_model.pkl') | |
def make_prediction(input_features): | |
processed_features = pipeline.transform(pd.DataFrame([input_features])) | |
prediction = model.predict(processed_features) | |
return prediction[0] | |