Spaces:
Sleeping
Sleeping
Upload 2 files
Browse files- app.py +10 -31
- prediction.py +1 -62
app.py
CHANGED
@@ -39,37 +39,16 @@ elif selection == "EDA":
|
|
39 |
elif selection == "Make a Prediction":
|
40 |
st.title("Make a Sales Prediction")
|
41 |
with st.form("input_form"):
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
|
44 |
-
order_date = st.date_input('Order Date', datetime.now())
|
45 |
-
ship_date = st.date_input('Ship Date', datetime.now()) # Assume shipping the next day
|
46 |
-
ship_mode = st.selectbox('Ship Mode', ['First Class', 'Second Class', 'Standard Class', 'Same Day'], index=2) # Default to Standard Class
|
47 |
-
segment = st.selectbox('Segment', ['Consumer', 'Corporate', 'Home Office'], index=0) # Default to Consumer
|
48 |
-
country = st.text_input('Country', value='United States')
|
49 |
-
city = st.text_input('City', value='Los Angeles') # Example city
|
50 |
-
state = st.text_input('State', value='California') # Example state
|
51 |
-
postal_code = st.text_input('Postal Code', value='90001') # Example postal code
|
52 |
-
region = st.selectbox('Region', ['South', 'West', 'Central', 'East'], index=1) # Default to West
|
53 |
-
category = st.selectbox('Category', ['Furniture', 'Office Supplies', 'Technology'], index=1) # Default to Office Supplies
|
54 |
-
sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'], index=10) # Default to Paper
|
55 |
-
product_name = st.text_input('Product Name', value='Staple papers') # Example product
|
56 |
-
sales = st.number_input('Sales', value=100.0, format="%.2f") # Example sales amount
|
57 |
-
quantity = st.number_input('Quantity', value=2, format="%d") # Example quantity
|
58 |
-
discount = st.number_input('Discount', value=0.0, format="%.2f") # Example discount
|
59 |
|
|
|
|
|
|
|
|
|
60 |
|
61 |
-
submit_button = st.form_submit_button("Predict")
|
62 |
-
|
63 |
-
if submit_button:
|
64 |
-
# Construct the input DataFrame
|
65 |
-
input_features = pd.DataFrame([[ sales, discount, quantity, sub_category
|
66 |
-
]], columns=[
|
67 |
-
'Sales', 'Discount', 'Quantity', 'Sub-Category'
|
68 |
-
])
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
predicted_profit = make_prediction(input_features) # Adjust this line as necessary
|
73 |
-
|
74 |
-
|
75 |
-
st.write(f'Predicted Profit: {predicted_profit:.2f}')
|
|
|
39 |
elif selection == "Make a Prediction":
|
40 |
st.title("Make a Sales Prediction")
|
41 |
with st.form("input_form"):
|
42 |
+
# Simplify input fields based on what's actually used
|
43 |
+
sales = st.number_input('Sales', value=100.0, format="%.2f")
|
44 |
+
quantity = st.number_input('Quantity', value=2, format="%d")
|
45 |
+
discount = st.number_input('Discount', value=0.0, format="%.2f")
|
46 |
+
sub_category = st.selectbox('Sub-Category', ['Bookcases', 'Chairs', 'Labels', 'Tables', 'Storage', 'Furnishings', 'Art', 'Phones', 'Binders', 'Appliances', 'Paper', 'Accessories', 'Envelopes', 'Fasteners', 'Supplies', 'Machines', 'Copiers'])
|
47 |
|
48 |
+
submitted = st.form_submit_button("Predict")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
|
50 |
+
if submitted:
|
51 |
+
input_features = pd.DataFrame([[sales, quantity, discount, sub_category]], columns=['sales', 'quantity', 'discount', 'sub_category'])
|
52 |
+
predicted_profit = make_prediction(input_features)
|
53 |
+
st.write(f'Predicted Profit: {predicted_profit:.2f}')
|
54 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
prediction.py
CHANGED
@@ -1,73 +1,12 @@
|
|
1 |
import joblib
|
2 |
import pandas as pd
|
3 |
-
from sklearn.base import BaseEstimator, TransformerMixin
|
4 |
-
from sklearn.preprocessing import OneHotEncoder
|
5 |
-
from sklearn.cluster import KMeans
|
6 |
-
from sklearn.pipeline import Pipeline
|
7 |
-
from sklearn.compose import ColumnTransformer
|
8 |
-
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
9 |
-
|
10 |
-
# Custom Transformer: UnitPriceTransformer
|
11 |
-
class UnitPriceTransformer(BaseEstimator, TransformerMixin):
|
12 |
-
def fit(self, X, y=None):
|
13 |
-
return self
|
14 |
-
|
15 |
-
def transform(self, X):
|
16 |
-
X = X.copy() # Work on a copy to avoid SettingWithCopyWarning
|
17 |
-
X['unit_price'] = X['sales'] / X['quantity']
|
18 |
-
return X[['unit_price']]
|
19 |
-
|
20 |
-
# Custom Transformer: KMeansAndLabelTransformer
|
21 |
-
class KMeansAndLabelTransformer(BaseEstimator, TransformerMixin):
|
22 |
-
def __init__(self, n_clusters=3):
|
23 |
-
self.n_clusters = n_clusters
|
24 |
-
self.kmeans = KMeans(n_clusters=n_clusters, random_state=42)
|
25 |
-
|
26 |
-
def fit(self, X, y=None):
|
27 |
-
self.kmeans.fit(X[['unit_price']])
|
28 |
-
return self
|
29 |
-
|
30 |
-
def transform(self, X):
|
31 |
-
X = X.copy() # Work on a copy to avoid SettingWithCopyWarning
|
32 |
-
cluster_labels = self.kmeans.predict(X[['unit_price']])
|
33 |
-
X['distinct_cluster_label'] = cluster_labels.astype(str) + "_" + X['sub_category']
|
34 |
-
return X[['distinct_cluster_label']]
|
35 |
-
|
36 |
-
# Custom Transformer: DynamicOneHotEncoder
|
37 |
-
class DynamicOneHotEncoder(BaseEstimator, TransformerMixin):
|
38 |
-
def __init__(self):
|
39 |
-
self.encoder = OneHotEncoder(handle_unknown='ignore')
|
40 |
-
|
41 |
-
def fit(self, X, y=None):
|
42 |
-
self.encoder.fit(X[['distinct_cluster_label']])
|
43 |
-
return self
|
44 |
-
|
45 |
-
def transform(self, X):
|
46 |
-
X = X.copy() # Work on a copy to avoid SettingWithCopyWarning
|
47 |
-
encoded_features = self.encoder.transform(X[['distinct_cluster_label']]).toarray()
|
48 |
-
# Create a DataFrame with the encoded features
|
49 |
-
encoded_df = pd.DataFrame(encoded_features, columns=self.encoder.get_feature_names_out(['distinct_cluster_label']))
|
50 |
-
return encoded_df
|
51 |
|
52 |
# Load the pipeline and model
|
53 |
pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
|
54 |
model = joblib.load('best_model.pkl')
|
55 |
-
preprocessor = joblib.load('preprocessor.pkl')
|
56 |
|
57 |
def make_prediction(input_features):
|
58 |
-
|
59 |
-
transformers=[
|
60 |
-
('num', StandardScaler(), ['sales', 'quantity', 'discount']), # Adjust as necessary
|
61 |
-
],
|
62 |
-
remainder='passthrough'
|
63 |
-
)
|
64 |
-
pipeline = Pipeline(steps=[
|
65 |
-
('unit_price', UnitPriceTransformer()), # Calculate 'unit_price'
|
66 |
-
('kmeans_label', KMeansAndLabelTransformer(n_clusters=3)), # Apply KMeans and generate 'distinct_cluster_label'
|
67 |
-
('dynamic_ohe', DynamicOneHotEncoder()), # Dynamically encode 'distinct_cluster_label'
|
68 |
-
('preprocessor', preprocessor), # Apply standard preprocessing
|
69 |
-
])
|
70 |
-
|
71 |
processed_features = pipeline.transform(input_features)
|
72 |
prediction = model.predict(processed_features)
|
73 |
return prediction[0]
|
|
|
1 |
import joblib
|
2 |
import pandas as pd
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3 |
|
4 |
# Load the pipeline and model
|
5 |
pipeline = joblib.load('full_pipeline_with_unit_price.pkl')
|
6 |
model = joblib.load('best_model.pkl')
|
|
|
7 |
|
8 |
def make_prediction(input_features):
|
9 |
+
# Assuming input_features is a DataFrame with the correct structure
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
processed_features = pipeline.transform(input_features)
|
11 |
prediction = model.predict(processed_features)
|
12 |
return prediction[0]
|