aaronayitey commited on
Commit
5765cf7
1 Parent(s): 20223fb

Delete streamlit_app.py

Browse files
Files changed (1) hide show
  1. streamlit_app.py +0 -217
streamlit_app.py DELETED
@@ -1,217 +0,0 @@
1
- import streamlit as st
2
- import pandas as pd
3
- import seaborn as sns
4
- import matplotlib.pyplot as plt
5
-
6
-
7
- # Machine Learning Modeling
8
- from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
9
- from sklearn.compose import ColumnTransformer
10
- from sklearn.linear_model import LinearRegression
11
- from sklearn.model_selection import RandomizedSearchCV
12
- import xgboost as xgb
13
- from sklearn.metrics import mean_squared_error
14
- import joblib
15
-
16
- # Set the page layout to full width
17
- st.set_page_config(layout="wide")
18
- # Initialize df as None
19
- df = None
20
-
21
- st.sidebar.title("Favorita Stores")
22
- selected_option = st.sidebar.radio("Select to Proceed", ["Data Statistics", "Visuals", "Time Series Analysis", "Forecasting"])
23
-
24
- # Custom CSS styling for the title
25
- st.markdown(
26
- """
27
- <style>
28
- .title-text {
29
- font-size: 28px;
30
- text-align: center;
31
- background-color: #3498db;
32
- color: white;
33
- padding: 10px 0;
34
- width: 100%;
35
- position: sticky;
36
- top: 0;
37
- z-index: 1;
38
- }
39
- </style>
40
- """,
41
- unsafe_allow_html=True
42
- )
43
-
44
- # Streamlit App Title
45
- st.markdown('<p class="title-text">Machine Learning App for Sales Prediction</p>', unsafe_allow_html=True)
46
-
47
- # Function to load and process the data
48
- def load_and_process_data():
49
- global df
50
- # Allow the user to upload an Excel file
51
- uploaded_file = st.file_uploader("Choose an Excel file", type=["xlsx"])
52
- if uploaded_file is not None:
53
- # Check if the file is an Excel file
54
- if uploaded_file.type == 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet':
55
- # Read the Excel file into a DataFrame
56
- df = pd.read_excel(uploaded_file)
57
- # Remove null values
58
- df.dropna(inplace=True)
59
- df = df.drop(columns='Unnamed: 0')
60
- else:
61
- st.write("Please upload a valid Excel file.")
62
-
63
- # Load and process the data
64
- load_and_process_data()
65
-
66
- if selected_option == "Data Statistics":
67
- # Rest of the code for "Data Statistics" option using df
68
- if df is not None:
69
- number_sample = st.number_input("Enter sample size to display data", min_value=5, max_value=10, step=1, value=5)
70
- displayed_data = df.head(number_sample)
71
- st.write("Sample data", displayed_data)
72
- st.write("Summary Statistics of float/Integer columns", df.describe())
73
- object_columns = df.select_dtypes(include='object').columns.tolist()
74
- selected_column = st.selectbox("Select column of Data Type Object to View Unique values", object_columns)
75
- if selected_column:
76
- unique_values = df[selected_column].unique()
77
- st.write("Unique values are", unique_values)
78
-
79
- elif selected_option == "Visuals":
80
- # Rest of the code for "Visuals" option using df
81
- if df is not None:
82
- object_columns = df.select_dtypes(include='object').columns.tolist()
83
- selected_column = st.selectbox("Select column of Data Type Object for Visualization", object_columns)
84
- if selected_column:
85
- df['date'] = pd.to_datetime(df['date']) # Convert to datetime if applicable
86
- df_grouped = df.groupby(selected_column)['sales'].sum().head(10)
87
- df_grouped = df_grouped.sort_values(ascending=False)
88
- fig, ax = plt.subplots(figsize=(15, 6))
89
- ax.bar(df_grouped.index, df_grouped.values)
90
- ax.set_xlabel(selected_column)
91
- ax.set_ylabel('Sales Count')
92
- ax.set_title(f'Top 10 Sales Count for {selected_column}')
93
- st.pyplot(fig) # Pass the figure to st.pyplot()
94
- elif selected_option == "Time Series Analysis":
95
- if df is not None:
96
- # Choose date and sales columns
97
- timeseriesdata = df[['sales', 'date']]
98
- timeseriesdata.index = timeseriesdata['date']
99
- # Make date the index
100
- timeseriesdata = timeseriesdata.resample('D').sum() # Resample to daily sales
101
-
102
- # Resample the data based on user's choice
103
- resample_method = st.selectbox("Select a resampling method", ['M', 'Q', 'Y'])
104
- if resample_method:
105
- resampled_data = timeseriesdata.resample(resample_method).sum()
106
-
107
- # Plot the time series using Seaborn lineplot
108
- plt.figure(figsize=(15, 6))
109
- sns.lineplot(data=resampled_data)
110
- plt.ylabel('Sales')
111
- plt.title(f'Sales Time Series (Resampled by {resample_method})')
112
- st.pyplot(plt.gcf())
113
- else:
114
- st.write("Please enter these inputs to predict sales. Thank you!")
115
- # Load the pre-trained model and preprocessor
116
- model = joblib.load('./xgb_model.joblib')
117
- preprocessor = joblib.load('./preprocessor.joblib')
118
-
119
-
120
-
121
- # Create a layout with 2 columns for even distribution
122
- col1, col2 = st.columns(2)
123
-
124
- # User Inputs - Number
125
- with col1:
126
- # Create a date input using st.date_input
127
- date = st.date_input("Enter Date")
128
-
129
- # Convert the selected date to a string in the desired format (e.g., YYYY-MM-DD)
130
- formatted_date = date.strftime('%Y-%m-%d')
131
-
132
- # User Inputs - Year
133
- with col2:
134
- family = st.selectbox("Select product family", ['CELEBRATION', 'CLEANING', 'DAIRY', 'DELI', 'EGGS', 'FROZEN FOODS',
135
- 'GROCERY I', 'GROCERY II', 'HARDWARE', 'HOME AND KITCHEN I',
136
- 'HOME AND KITCHEN II', 'HOME APPLIANCES', 'HOME CARE',
137
- 'LADIESWEAR', 'LAWN AND GARDEN', 'LINGERIE', 'LIQUOR,WINE,BEER',
138
- 'MAGAZINES', 'MEATS', 'PERSONAL CARE', 'PET SUPPLIES',
139
- 'PLAYERS AND ELECTRONICS', 'POULTRY', 'PREPARED FOODS', 'PRODUCE',
140
- 'SCHOOL AND OFFICE SUPPLIES', 'SEAFOOD', 'AUTOMOTIVE', 'BABY CARE',
141
- 'BEAUTY', 'BEVERAGES', 'BOOKS', 'BREAD/BAKERY'])
142
-
143
- # User Inputs - On Promotion
144
- with col1:
145
- onpromotion = st.number_input("Enter Number for onpromotion", min_value=0, step=1)
146
-
147
-
148
- # User Inputs - Day of the Week
149
- with col2:
150
- city = st.selectbox("Select city", ['Quito', 'Cayambe', 'Latacunga', 'Riobamba', 'Ibarra',
151
- 'Santo Domingo', 'Guaranda', 'Puyo', 'Ambato', 'Guayaquil',
152
- 'Salinas', 'Daule', 'Babahoyo', 'Quevedo', 'Playas', 'Libertad',
153
- 'Cuenca', 'Loja', 'Machala', 'Esmeraldas', 'Manta', 'El Carmen'])
154
-
155
- # User Inputs - Product Category
156
- with col1:
157
- oil_prices = st.number_input("Enter oil price", min_value=1, step=1)
158
-
159
-
160
- # User Inputs - Day of the Week
161
- with col2:
162
- holiday_type = st.selectbox("Select holiday type", ['Holiday', 'Additional', 'Transfer', 'Event', 'Bridge'])
163
-
164
- # User Inputs - Product Category
165
- with col1:
166
- sales_lag_1 = st.number_input("Enter Number for sales lag", min_value=0, step=1)
167
-
168
-
169
- # User Inputs - Day of the Week
170
- with col2:
171
- moving_average = st.number_input("Enter Number for moving average", min_value=0, step=1)
172
-
173
- # Placeholder for Predicted Value
174
-
175
- # Add custom spacing between columns
176
- st.markdown("<hr>", unsafe_allow_html=True)
177
-
178
-
179
-
180
- # Predict Button
181
- if st.button("Predict"):
182
- # Prepare input data for prediction
183
- # Prepare input data for prediction
184
- # Create a DataFrame with all required columns except "sales"
185
- prediction_placeholder = st.empty()
186
- input_df = pd.DataFrame({
187
- "family": [family],
188
- "onpromotion": [onpromotion],
189
- "city": [city],
190
- "oil_prices": [oil_prices],
191
- "holiday_type": [holiday_type],
192
- "sales_lag_1": [sales_lag_1],
193
- "moving_average": [moving_average]
194
- })
195
-
196
- # Transform the input DataFrame using the preprocessor
197
- preprocessed_data = preprocessor.transform(input_df)
198
-
199
-
200
-
201
- # Make a prediction
202
- prediction = model.predict(preprocessed_data)
203
-
204
-
205
- # Display the prediction
206
- prediction_placeholder.text(f"Predicted Value for sales: {prediction[0]: ,.2f}")
207
-
208
- if prediction >= 0:
209
- prediction_placeholder.markdown(
210
- f'Predicted Value for sales: <span style="background-color: green; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
211
- unsafe_allow_html=True
212
- )
213
- else:
214
- prediction_placeholder.markdown(
215
- f'Predicted Value for sales: <span style="background-color: red; padding: 2px 5px; border-radius: 5px;">${prediction[0]:,.2f}</span>',
216
- unsafe_allow_html=True
217
- )