File size: 3,644 Bytes
b929aa7 fb52313 b929aa7 fb52313 b929aa7 fb52313 b929aa7 fb52313 c319fc9 b929aa7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
import streamlit as st
import pandas as pd
import openpyxl
import lifetimes
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
import warnings
warnings.filterwarnings('ignore')
st.set_page_config(page_title='Detect Inactive Records')
st.title('Detect Inactive Records')
st.subheader('Upload your Excel file')
uploaded_file = st.file_uploader('Choose a XLSX file', type='xlsx')
if uploaded_file is not None:
st.markdown('---')
# Loading the data
@st.cache_data
def load_excel(file1):
df = pd.read_excel(file1, engine='openpyxl', parse_dates=['InvoiceDate'])
return df
data = load_excel(uploaded_file)
st.subheader('Data Preview')
st.dataframe(data.head(20))
# Feature selection
features = ['CustomerID', 'InvoiceNo', 'InvoiceDate', 'Quantity', 'UnitPrice']
data_clv = data[features]
data_clv['TotalSales'] = data_clv['Quantity'].multiply(data_clv['UnitPrice'])
#Check for missing values
mising=pd.DataFrame(zip(data_clv.isnull().sum(), data_clv.isnull().sum()/len(data_clv)), columns=['Count', 'Proportion'], index=data_clv.columns)
data_clv = data_clv[pd.notnull(data_clv['CustomerID'])]
#Remove -ve values
data_clv = data_clv[data_clv['TotalSales'] > 0]
# Creating the summary data using summary_data_from_transaction_data function
summary = lifetimes.utils.summary_data_from_transaction_data(data_clv, 'CustomerID', 'InvoiceDate', 'TotalSales' )
summary = summary.reset_index()
summary['frequency'].plot(kind='hist', bins=50)
one_time_buyers = round(sum(summary['frequency'] == 0)/float(len(summary))*(100),2)
# Fitting the BG/NBD model
bgf = lifetimes.BetaGeoFitter(penalizer_coef=0.0)
bgf.fit(summary['frequency'], summary['recency'], summary['T'])
bgf_coefficient=bgf.summary
# Compute the customer alive probability
summary['probability_alive'] = bgf.conditional_probability_alive(summary['frequency'], summary['recency'], summary['T'])
#Predict future transaction for the next 300 days based on historical dataa
t = 300
summary['Predicted No. of Transaction'] = round(bgf.conditional_expected_number_of_purchases_up_to_time(t, summary['frequency'], summary['recency'], summary['T']),2)
summary.sort_values(by='Predicted No. of Transaction', ascending=False).head(10).reset_index()
#Hidden trends
ax = sns.countplot(x="Predicted No. of Transaction",data=summary)
plt.scatter(summary['probability_alive'],summary['Predicted No. of Transaction'])
summary_correlation=summary.corr()
summary1=summary
summary1['Active/Inactive']=summary1['Predicted No. of Transaction'].apply(lambda x:"ACTIVE" if x>=1 else "INACTIVE")
selector=st.selectbox('Select User ID',summary1['CustomerID'],index=None,placeholder='Select Customer ID')
summary2=summary1[['CustomerID','Active/Inactive']]
if selector is not None:
selected=summary2.loc[summary1['CustomerID']==selector].iloc[0,1]
st.write('STATUS:',selected)
trends= data_clv.groupby('CustomerID')['Quantity'].mean().reset_index()
trends1= data_clv.groupby('CustomerID')['TotalSales'].mean().reset_index()
summary1=summary1.merge(trends, how='left',on='CustomerID')
summary1=summary1.merge(trends1, how='left',on='CustomerID')
summary1.drop('probability_alive',axis=1,inplace=True)
summary1.rename(columns={'Quantity':'Average Quantity','TotalSales':'Average Sales Value'},inplace=True)
out=summary1.to_csv().encode('utf-8')
st.download_button(label='DOWNLOAD RESULT',data=out, file_name='CLV_OUTPUT.csv',mime='text/csv') |