import joblib import pandas as pd from sklearn.base import BaseEstimator, TransformerMixin from sklearn.preprocessing import OneHotEncoder from sklearn.cluster import KMeans # Custom Transformer: UnitPriceTransformer class UnitPriceTransformer(BaseEstimator, TransformerMixin): def fit(self, X, y=None): return self def transform(self, X): X = X.copy() # Work on a copy to avoid SettingWithCopyWarning X['unit_price'] = X['sales'] / X['quantity'] return X[['unit_price']] # Custom Transformer: KMeansAndLabelTransformer class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin): def __init__(self, n_clusters=3): self.n_clusters = n_clusters self.kmeans = KMeans(n_clusters=n_clusters, random_state=42) def fit(self, X, y=None): self.kmeans.fit(X[['unit_price']]) return self def transform(self, X): X = X.copy() # Work on a copy to avoid SettingWithCopyWarning cluster_labels = self.kmeans.predict(X[['unit_price']]) X['distinct_cluster_label'] = cluster_labels.astype(str) + "_" + X['sub_category'] return X[['distinct_cluster_label']] # Custom Transformer: DynamicOneHotEncoder class DynamicOneHotEncoder(BaseEstimator, TransformerMixin): def __init__(self): self.encoder = OneHotEncoder(handle_unknown='ignore') def fit(self, X, y=None): self.encoder.fit(X[['distinct_cluster_label']]) return self def transform(self, X): X = X.copy() # Work on a copy to avoid SettingWithCopyWarning encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray() # Create a DataFrame with the encoded features encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label'])) return encoded_df # Load the pipeline and model pipeline = joblib.load('full_pipeline_with_unit_price.pkl') model = joblib.load('best_model.pkl') def make_prediction(input_features): processed_features = pipeline.transform(pd.DataFrame([input_features])) prediction = model.predict(processed_features) return prediction[0]