Spaces:

7sugiwa
/

profitboost

Sleeping

App Files Files Community

7sugiwa commited on Feb 8

Commit

0ec3d9b

•

1 Parent(s): e24448f

Upload 7 files

Browse files

Files changed (4) hide show

full_pipeline_with_unit_price.pkl +2 -2
prediction.py +12 -3
preprocessor.pkl +2 -2
transformers.py +44 -2

full_pipeline_with_unit_price.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c0d7e033c7f305bfd4da107e53afd53a73c5ab9af4a335fd39bc5bad88e546c4
-size 37369

 version https://git-lfs.github.com/spec/v1
+oid sha256:b44c045125a5dc057c0b560b07ef9db20adefc79884a33ffeffd239c8c03b2d4
+size 34361

prediction.py CHANGED Viewed

@@ -1,9 +1,18 @@
-import joblib
 import pandas as pd
 # Load the pipeline and model
-pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
-model = joblib.load('best_model.pkl')
 def make_prediction(input_features):
     # Assuming input_features is a DataFrame with the correct structure

+import pickle
 import pandas as pd
 # Load the pipeline and model
+# Load the pipeline object from the file
+with open('full_pipeline_with_unit_price.pkl', 'rb') as file:
+    pipeline = pickle.load(file)
+# Load the preprocessor object from the file
+with open('preprocessor.pkl', 'rb') as file:
+    preprocessor = pickle.load(file)
+# Load the model object from the file
+with open('best_model.pkl', 'rb') as file:
+    model = pickle.load(file)
 def make_prediction(input_features):
     # Assuming input_features is a DataFrame with the correct structure

preprocessor.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:52dec9ecf963a4d8a050f58ee02438db62121bb9a71419e85d067b0957429f0b
-size 4979

 version https://git-lfs.github.com/spec/v1
+oid sha256:ac46af803735f9b18167add1df884ad73ac9c35f7fc0e2784cc6b3e389145113
+size 2779

transformers.py CHANGED Viewed

@@ -1,10 +1,52 @@
 from sklearn.base import BaseEstimator, TransformerMixin
 class UnitPriceTransformer(BaseEstimator, TransformerMixin):
     def fit(self, X, y=None):
         return self
     def transform(self, X):
         X['unit_price'] = X['sales'] / X['quantity']
         return X
-# Include other custom transformers as needed

 from sklearn.base import BaseEstimator, TransformerMixin
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.cluster import KMeans
+import pandas as pd
 class UnitPriceTransformer(BaseEstimator, TransformerMixin):
     def fit(self, X, y=None):
         return self
     def transform(self, X):
         X['unit_price'] = X['sales'] / X['quantity']
         return X
+class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin):
+    def __init__(self, n_clusters=3):
+        self.n_clusters = n_clusters
+        self.kmeans = KMeans(n_clusters=n_clusters, random_state=42)
+    def fit(self, X, y=None):
+        # Fit the KMeans model on the 'unit_price', ensuring it's reshaped for a single feature
+        self.kmeans.fit(X[['unit_price']])
+        return self
+    def transform(self, X):
+        # Predict the cluster labels
+        cluster_labels = self.kmeans.predict(X[['unit_price']])
+        # Convert cluster labels to strings for concatenation
+        # Create a new DataFrame column for 'distinct_cluster_label'
+        # Here, we use the apply function with a lambda to concatenate the string representations safely
+        X = X.copy()  # Avoid SettingWithCopyWarning
+        X['cluster_labels_str'] = cluster_labels.astype(str)
+        X['distinct_cluster_label'] = X.apply(lambda row: row['cluster_labels_str'] + "_" + str(row['sub_category']), axis=1)
+        # Now that 'distinct_cluster_label' is created, 'cluster_labels_str' can be dropped
+        X.drop(['cluster_labels_str'], axis=1, inplace=True)
+        return X
+class DynamicOneHotEncoder(BaseEstimator, TransformerMixin):
+    def fit(self, X, y=None):
+        self.encoder = OneHotEncoder(handle_unknown='ignore')
+        self.encoder.fit(X[['distinct_cluster_label']])
+        return self
+    def transform(self, X):
+        encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray()
+        encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label']))
+        X.reset_index(drop=True, inplace=True)
+        result = pd.concat([X, encoded_df], axis=1)
+        result.drop(['distinct_cluster_label', 'sub_category', 'unit_price'], axis=1, inplace=True)  # Drop original columns if not needed
+        return result