File size: 2,201 Bytes
51abf05
 
f844302
 
 
 
5a56710
f844302
 
 
 
 
5a56710
f844302
5a56710
f844302
5a56710
f844302
 
 
 
 
 
 
 
 
 
5a56710
f844302
5a56710
 
f844302
5a56710
f844302
5a56710
f844302
5a56710
 
f844302
 
 
 
5a56710
f844302
5a56710
f844302
5a56710
51abf05
5a56710
51abf05
 
 
8c9dd24
5a56710
51abf05
5a56710
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
import joblib
import pandas as pd
from sklearn.base import BaseEstimator, TransformerMixin
from sklearn.preprocessing import OneHotEncoder
from sklearn.cluster import KMeans

# Custom Transformer: UnitPriceTransformer
class UnitPriceTransformer(BaseEstimator, TransformerMixin):
    def fit(self, X, y=None):
        return self
    
    def transform(self, X):
        X = X.copy()  # Work on a copy to avoid SettingWithCopyWarning
        X['unit_price'] = X['sales'] / X['quantity']
        return X[['unit_price']]

# Custom Transformer: KMeansAndLabelTransformer
class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin):
    def __init__(self, n_clusters=3):
        self.n_clusters = n_clusters
        self.kmeans = KMeans(n_clusters=n_clusters, random_state=42)
    
    def fit(self, X, y=None):
        self.kmeans.fit(X[['unit_price']])
        return self
    
    def transform(self, X):
        X = X.copy()  # Work on a copy to avoid SettingWithCopyWarning
        cluster_labels = self.kmeans.predict(X[['unit_price']])
        X['distinct_cluster_label'] = cluster_labels.astype(str) + "_" + X['sub_category']
        return X[['distinct_cluster_label']]

# Custom Transformer: DynamicOneHotEncoder
class DynamicOneHotEncoder(BaseEstimator, TransformerMixin):
    def __init__(self):
        self.encoder = OneHotEncoder(handle_unknown='ignore')
    
    def fit(self, X, y=None):
        self.encoder.fit(X[['distinct_cluster_label']])
        return self
    
    def transform(self, X):
        X = X.copy()  # Work on a copy to avoid SettingWithCopyWarning
        encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray()
        # Create a DataFrame with the encoded features
        encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label']))
        return encoded_df

# Load the pipeline and model
pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
model = joblib.load('best_model.pkl')

def make_prediction(input_features):
    processed_features = pipeline.transform(pd.DataFrame([input_features]))
    prediction = model.predict(processed_features)
    return prediction[0]