Spaces:

7sugiwa
/

profitboost

Sleeping

App Files Files Community

7sugiwa commited on Feb 8

Commit

66191ca

•

1 Parent(s): ad102b7

Upload 2 files

Browse files

Files changed (2) hide show

app.py +10 -31
prediction.py +1 -62

app.py CHANGED Viewed

@@ -39,37 +39,16 @@ elif selection == "EDA":
 elif selection == "Make a Prediction":
     st.title("Make a Sales Prediction")
     with st.form("input_form"):
-        # Capture all inputs as per the original dataset
-        order_date = st.date_input('Order Date', datetime.now())
-        ship_date = st.date_input('Ship Date', datetime.now())  # Assume shipping the next day
-        ship_mode = st.selectbox('Ship Mode', ['First Class', 'Second Class', 'Standard Class', 'Same Day'], index=2)  # Default to Standard Class
-        segment = st.selectbox('Segment', ['Consumer', 'Corporate', 'Home Office'], index=0)  # Default to Consumer
-        country = st.text_input('Country', value='United States')
-        city = st.text_input('City', value='Los Angeles')  # Example city
-        state = st.text_input('State', value='California')  # Example state
-        postal_code = st.text_input('Postal Code', value='90001')  # Example postal code
-        region = st.selectbox('Region', ['South', 'West', 'Central', 'East'], index=1)  # Default to West
-        category = st.selectbox('Category', ['Furniture', 'Office Supplies', 'Technology'], index=1)  # Default to Office Supplies
-        sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'], index=10)  # Default to Paper
-        product_name = st.text_input('Product Name', value='Staple papers')  # Example product
-        sales = st.number_input('Sales', value=100.0, format="%.2f")  # Example sales amount
-        quantity = st.number_input('Quantity', value=2, format="%d")  # Example quantity
-        discount = st.number_input('Discount', value=0.0, format="%.2f")  # Example discount
-        submit_button = st.form_submit_button("Predict")
-    if submit_button:
-        # Construct the input DataFrame
-        input_features = pd.DataFrame([[ sales, discount, quantity, sub_category
-        ]], columns=[
-            'Sales', 'Discount', 'Quantity', 'Sub-Category'
-        ])
-        predicted_profit = make_prediction(input_features)  # Adjust this line as necessary
-        st.write(f'Predicted Profit: {predicted_profit:.2f}')

 elif selection == "Make a Prediction":
     st.title("Make a Sales Prediction")
     with st.form("input_form"):
+        # Simplify input fields based on what's actually used
+        sales = st.number_input('Sales', value=100.0, format="%.2f")
+        quantity = st.number_input('Quantity', value=2, format="%d")
+        discount = st.number_input('Discount', value=0.0, format="%.2f")
+        sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'])
+        submitted = st.form_submit_button("Predict")
+        if submitted:
+            input_features = pd.DataFrame([[sales, quantity, discount, sub_category]], columns=['sales', 'quantity', 'discount', 'sub_category'])
+            predicted_profit = make_prediction(input_features)
+            st.write(f'Predicted Profit: {predicted_profit:.2f}')

prediction.py CHANGED Viewed

@@ -1,73 +1,12 @@
 import joblib
 import pandas as pd
-from sklearn.base import BaseEstimator, TransformerMixin
-from sklearn.preprocessing import OneHotEncoder
-from sklearn.cluster import KMeans
-from sklearn.pipeline import Pipeline
-from sklearn.compose import ColumnTransformer
-from sklearn.preprocessing import StandardScaler, OneHotEncoder
-# Custom Transformer: UnitPriceTransformer
-class UnitPriceTransformer(BaseEstimator, TransformerMixin):
-    def fit(self, X, y=None):
-        return self
-    def transform(self, X):
-        X = X.copy()  # Work on a copy to avoid SettingWithCopyWarning
-        X['unit_price'] = X['sales'] / X['quantity']
-        return X[['unit_price']]
-# Custom Transformer: KMeansAndLabelTransformer
-class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin):
-    def __init__(self, n_clusters=3):
-        self.n_clusters = n_clusters
-        self.kmeans = KMeans(n_clusters=n_clusters, random_state=42)
-    def fit(self, X, y=None):
-        self.kmeans.fit(X[['unit_price']])
-        return self
-    def transform(self, X):
-        X = X.copy()  # Work on a copy to avoid SettingWithCopyWarning
-        cluster_labels = self.kmeans.predict(X[['unit_price']])
-        X['distinct_cluster_label'] = cluster_labels.astype(str) + "_" + X['sub_category']
-        return X[['distinct_cluster_label']]
-# Custom Transformer: DynamicOneHotEncoder
-class DynamicOneHotEncoder(BaseEstimator, TransformerMixin):
-    def __init__(self):
-        self.encoder = OneHotEncoder(handle_unknown='ignore')
-    def fit(self, X, y=None):
-        self.encoder.fit(X[['distinct_cluster_label']])
-        return self
-    def transform(self, X):
-        X = X.copy()  # Work on a copy to avoid SettingWithCopyWarning
-        encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray()
-        # Create a DataFrame with the encoded features
-        encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label']))
-        return encoded_df
 # Load the pipeline and model
 pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
 model = joblib.load('best_model.pkl')
-preprocessor = joblib.load('preprocessor.pkl')
 def make_prediction(input_features):
-    preprocessor = ColumnTransformer(
-    transformers=[
-        ('num', StandardScaler(), ['sales', 'quantity', 'discount']),  # Adjust as necessary
-    ],
-    remainder='passthrough'
-)
-    pipeline = Pipeline(steps=[
-    ('unit_price', UnitPriceTransformer()),  # Calculate 'unit_price'
-    ('kmeans_label', KMeansAndLabelTransformer(n_clusters=3)),  # Apply KMeans and generate 'distinct_cluster_label'
-    ('dynamic_ohe', DynamicOneHotEncoder()),  # Dynamically encode 'distinct_cluster_label'
-    ('preprocessor', preprocessor),  # Apply standard preprocessing
-])
     processed_features = pipeline.transform(input_features)
     prediction = model.predict(processed_features)
     return prediction[0]

 import joblib
 import pandas as pd
 # Load the pipeline and model
 pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
 model = joblib.load('best_model.pkl')
 def make_prediction(input_features):
+    # Assuming input_features is a DataFrame with the correct structure
     processed_features = pipeline.transform(input_features)
     prediction = model.predict(processed_features)
     return prediction[0]