7sugiwa commited on
Commit
66191ca
1 Parent(s): ad102b7

Upload 2 files

Browse files
Files changed (2) hide show
  1. app.py +10 -31
  2. prediction.py +1 -62
app.py CHANGED
@@ -39,37 +39,16 @@ elif selection == "EDA":
39
  elif selection == "Make a Prediction":
40
  st.title("Make a Sales Prediction")
41
  with st.form("input_form"):
 
 
 
 
 
42
 
43
- # Capture all inputs as per the original dataset
44
- order_date = st.date_input('Order Date', datetime.now())
45
- ship_date = st.date_input('Ship Date', datetime.now()) # Assume shipping the next day
46
- ship_mode = st.selectbox('Ship Mode', ['First Class', 'Second Class', 'Standard Class', 'Same Day'], index=2) # Default to Standard Class
47
- segment = st.selectbox('Segment', ['Consumer', 'Corporate', 'Home Office'], index=0) # Default to Consumer
48
- country = st.text_input('Country', value='United States')
49
- city = st.text_input('City', value='Los Angeles') # Example city
50
- state = st.text_input('State', value='California') # Example state
51
- postal_code = st.text_input('Postal Code', value='90001') # Example postal code
52
- region = st.selectbox('Region', ['South', 'West', 'Central', 'East'], index=1) # Default to West
53
- category = st.selectbox('Category', ['Furniture', 'Office Supplies', 'Technology'], index=1) # Default to Office Supplies
54
- sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'], index=10) # Default to Paper
55
- product_name = st.text_input('Product Name', value='Staple papers') # Example product
56
- sales = st.number_input('Sales', value=100.0, format="%.2f") # Example sales amount
57
- quantity = st.number_input('Quantity', value=2, format="%d") # Example quantity
58
- discount = st.number_input('Discount', value=0.0, format="%.2f") # Example discount
59
 
 
 
 
 
60
 
61
- submit_button = st.form_submit_button("Predict")
62
-
63
- if submit_button:
64
- # Construct the input DataFrame
65
- input_features = pd.DataFrame([[ sales, discount, quantity, sub_category
66
- ]], columns=[
67
- 'Sales', 'Discount', 'Quantity', 'Sub-Category'
68
- ])
69
-
70
-
71
-
72
- predicted_profit = make_prediction(input_features) # Adjust this line as necessary
73
-
74
-
75
- st.write(f'Predicted Profit: {predicted_profit:.2f}')
 
39
  elif selection == "Make a Prediction":
40
  st.title("Make a Sales Prediction")
41
  with st.form("input_form"):
42
+ # Simplify input fields based on what's actually used
43
+ sales = st.number_input('Sales', value=100.0, format="%.2f")
44
+ quantity = st.number_input('Quantity', value=2, format="%d")
45
+ discount = st.number_input('Discount', value=0.0, format="%.2f")
46
+ sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'])
47
 
48
+ submitted = st.form_submit_button("Predict")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
 
50
+ if submitted:
51
+ input_features = pd.DataFrame([[sales, quantity, discount, sub_category]], columns=['sales', 'quantity', 'discount', 'sub_category'])
52
+ predicted_profit = make_prediction(input_features)
53
+ st.write(f'Predicted Profit: {predicted_profit:.2f}')
54
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
prediction.py CHANGED
@@ -1,73 +1,12 @@
1
  import joblib
2
  import pandas as pd
3
- from sklearn.base import BaseEstimator, TransformerMixin
4
- from sklearn.preprocessing import OneHotEncoder
5
- from sklearn.cluster import KMeans
6
- from sklearn.pipeline import Pipeline
7
- from sklearn.compose import ColumnTransformer
8
- from sklearn.preprocessing import StandardScaler, OneHotEncoder
9
-
10
- # Custom Transformer: UnitPriceTransformer
11
- class UnitPriceTransformer(BaseEstimator, TransformerMixin):
12
- def fit(self, X, y=None):
13
- return self
14
-
15
- def transform(self, X):
16
- X = X.copy() # Work on a copy to avoid SettingWithCopyWarning
17
- X['unit_price'] = X['sales'] / X['quantity']
18
- return X[['unit_price']]
19
-
20
- # Custom Transformer: KMeansAndLabelTransformer
21
- class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin):
22
- def __init__(self, n_clusters=3):
23
- self.n_clusters = n_clusters
24
- self.kmeans = KMeans(n_clusters=n_clusters, random_state=42)
25
-
26
- def fit(self, X, y=None):
27
- self.kmeans.fit(X[['unit_price']])
28
- return self
29
-
30
- def transform(self, X):
31
- X = X.copy() # Work on a copy to avoid SettingWithCopyWarning
32
- cluster_labels = self.kmeans.predict(X[['unit_price']])
33
- X['distinct_cluster_label'] = cluster_labels.astype(str) + "_" + X['sub_category']
34
- return X[['distinct_cluster_label']]
35
-
36
- # Custom Transformer: DynamicOneHotEncoder
37
- class DynamicOneHotEncoder(BaseEstimator, TransformerMixin):
38
- def __init__(self):
39
- self.encoder = OneHotEncoder(handle_unknown='ignore')
40
-
41
- def fit(self, X, y=None):
42
- self.encoder.fit(X[['distinct_cluster_label']])
43
- return self
44
-
45
- def transform(self, X):
46
- X = X.copy() # Work on a copy to avoid SettingWithCopyWarning
47
- encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray()
48
- # Create a DataFrame with the encoded features
49
- encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label']))
50
- return encoded_df
51
 
52
  # Load the pipeline and model
53
  pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
54
  model = joblib.load('best_model.pkl')
55
- preprocessor = joblib.load('preprocessor.pkl')
56
 
57
  def make_prediction(input_features):
58
- preprocessor = ColumnTransformer(
59
- transformers=[
60
- ('num', StandardScaler(), ['sales', 'quantity', 'discount']), # Adjust as necessary
61
- ],
62
- remainder='passthrough'
63
- )
64
- pipeline = Pipeline(steps=[
65
- ('unit_price', UnitPriceTransformer()), # Calculate 'unit_price'
66
- ('kmeans_label', KMeansAndLabelTransformer(n_clusters=3)), # Apply KMeans and generate 'distinct_cluster_label'
67
- ('dynamic_ohe', DynamicOneHotEncoder()), # Dynamically encode 'distinct_cluster_label'
68
- ('preprocessor', preprocessor), # Apply standard preprocessing
69
- ])
70
-
71
  processed_features = pipeline.transform(input_features)
72
  prediction = model.predict(processed_features)
73
  return prediction[0]
 
1
  import joblib
2
  import pandas as pd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3
 
4
  # Load the pipeline and model
5
  pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
6
  model = joblib.load('best_model.pkl')
 
7
 
8
  def make_prediction(input_features):
9
+ # Assuming input_features is a DataFrame with the correct structure
 
 
 
 
 
 
 
 
 
 
 
 
10
  processed_features = pipeline.transform(input_features)
11
  prediction = model.predict(processed_features)
12
  return prediction[0]