ikoghoemmanuell's picture
Update app.py
7811d4b
raw
history blame
4.66 kB
import streamlit as st
import pandas as pd
import numpy as np
import pickle
import requests
from io import BytesIO
from PIL import Image
# Create the app
st.set_page_config(page_title='Sales Prediction App', page_icon=':bar_chart:', layout='wide')
st.title('Sales Prediction App')
# Enter the URL of the image
image_url = "https://cdn-images-1.medium.com/v2/resize:fit:800/1*npSOkWjHdKXMSiLJth1UhQ.png"
# Fetch the image from the URL
response = requests.get(image_url)
image = Image.open(BytesIO(response.content))
# Display the image in Streamlit
st.image(image, caption='Image from URL')
# Load the data
data = pd.read_csv('merged_train_data.csv')
# Split the data
X = data.drop('sales', axis=1)
# Load the model and encoder ans scaler
model = pickle.load(open("model.pkl", "rb"))
encoder = pickle.load(open("encoder.pkl", "rb"))
scaler = pickle.load(open("scaler.pkl", "rb"))
# merged3=merged3.set_index(['date'])
# Define the function to make predictions
def predict_sales(input_data, input_df):
# defining categories and numeric columns
categoric_columns = ['family', 'city', 'state', 'type_y', 'type_x']
columns = list(input_df.columns)
numeric_columns = [i for i in columns if i not in categoric_columns]
scaled_num = scaler.fit_transform(input_df[numeric_columns])
encoded_cat = encoder.transform(input_df[categoric_columns])
input_data = pd.concat([scaled_num, encoded_cat], axis=1)
# convert input_data to a numpy array before flattening to convert it back to a 2D array
input_data = input_data.to_numpy()
prediction = model.predict(input_data.flatten().reshape(1, -1))
return prediction
# introduction
st.write("""
This app predicts the sales amount for a given store and date based on various input features.
Please enter the required information and click on 'Predict' to get the predicted sales amount.
""")
# Adding sections for related porameters
st.write('## Store Information')
store_nbr = st.selectbox('Store Number', list(X['store_nbr'].unique()))
cluster = st.slider('Cluster', 1, 17)
city = st.selectbox('City', list(X['city'].unique()))
state = st.selectbox('State', list(X['state'].unique()))
st.write('## Product Information')
family = st.selectbox('Family', list(X['family'].unique()))
type_x = st.radio('Type X', list(X['type_x'].unique()))
type_y = st.radio('Type Y', list(X['type_y'].unique()))
onpromotion = st.selectbox('On Promotion', [True, False])
oil_price = st.number_input("Enter oil price", format="%.5f")
st.write('## Date Information')
# INPUT DATE, THEN USE GETDATEFEATURES TO EXTRACT ALL THE DATE INFORMATION
date = st.date_input("Pick a date")
# Convert input parameters to a pandas DataFrame
input_dict = {
'store_nbr': store_nbr,
'cluster': cluster,
'city': city,
'state': state,
'family': family,
'type_x': type_x,
'type_y': type_y,
'onpromotion': onpromotion,
'oil_price': oil_price,
'date' : date
}
input_df = pd.DataFrame([input_dict])
def getDateFeatures(df):
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['month'] = df['date'].dt.month
df['day_of_month'] = df['date'].dt.day
df['day_of_year'] = df['date'].dt.dayofyear
df['week_of_year'] = df['date'].dt.isocalendar().week
df['week_of_year'] = df['week_of_year'].astype(float)
df['day_of_week'] = df['date'].dt.dayofweek
df['year'] = df['date'].dt.year
df["is_weekend"] = np.where(df['day_of_week'] > 4, 1, 0)
df['is_month_start'] = df['date'].dt.is_month_start.astype(int)
df['quarter'] = df['date'].dt.quarter
df['is_month_end'] = df['date'].dt.is_month_end.astype(int)
df['is_quarter_start'] = df['date'].dt.is_quarter_start.astype(int)
df['is_quarter_end'] = df['date'].dt.is_quarter_end.astype(int)
df['is_year_start'] = df['date'].dt.is_year_start.astype(int)
df['is_year_end'] = df['date'].dt.is_year_end.astype(int)
df["season"] = np.where(df.month.isin([12,1,2]), 0, 1)
df["season"] = np.where(df.month.isin([6,7,8]), 2, df["season"])
df["season"] = pd.Series(np.where(df.month.isin([9, 10, 11]), 3, df["season"])).astype("int8")
df['pay_day'] = np.where((df['day_of_month']==15) | (df['is_month_end']==1), 1, 0)
df['earthquake_impact'] = np.where(df['date'].isin(
pd.date_range(start='2016-04-16', end='2016-12-31', freq='D')), 1, 0)
return df
input_df = getDateFeatures(input_df)
input_df = input_df.drop(columns= ['date'], axis=1)
# Make prediction and show results
if st.button('Predict'):
prediction = predict_sales(input_df.values, input_df)
st.success('The predicted sales amount is $' + str(round(prediction[0],2)))