# Import necessary libraries
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import joblib
import base64
from sklearn.metrics.pairwise import cosine_similarity
import re
from PIL import Image
# Load the trained Random Forest model and TF-IDF vectorizer
rf_classifier = joblib.load('random_forest_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
image1 = Image.open('image1.PNG')
logo = Image.open('logo.png')
hide_streamlit_style = """
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
def main():
st.title('Batch Product SKU Predictor')
# Sidebar
display_sidebar()
# Main UI sections
st.subheader('1. File Upload')
uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx'])
if uploaded_file:
st.success("File uploaded successfully!")
st.subheader('2. Processing Data...')
process_data(uploaded_file)
else:
st.info("Please upload a CSV or Excel file to get started.")
def display_sidebar():
"""Displays information on the sidebar."""
st.sidebar.image(logo, width=250)
st.sidebar.header('About')
st.sidebar.text('This app predicts product SKUs based\non uploaded data.')
st.sidebar.subheader('Instructions:')
st.sidebar.text('1. Upload your data file.')
st.sidebar.text('2. Make sure your column name is\n"Product Name".')
st.sidebar.image(image1, 'example')
st.sidebar.text('3. Wait for processing.')
st.sidebar.text('4. View and download the results.')
# Function to transform product names into SKU names
def transform_to_sku(product_name):
if isinstance(product_name, str):
# Remove unwanted characters
product_name = product_name.replace('.', '').replace('@', '').replace('+', '')
# Remove parentheses
product_name = re.sub(r'\((.*?)\)', r'\1', product_name)
# Insert hyphens between numbers and letters if there is no space
product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name)
product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name)
# Split, join with hyphens, and convert to uppercase
sku_name = '-'.join(product_name.split()).upper()
# Collapse multiple hyphens into one
sku_name = re.sub(r'-{2,}', '-', sku_name)
else:
sku_name = "UNKNOWN-SKU"
return sku_name
def process_file_upload():
"""Handles the file upload and processing."""
uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'])
if uploaded_file:
st.write("File uploaded successfully. Processing...")
process_data(uploaded_file)
else:
st.write("Awaiting file upload...")
def process_data(uploaded_file):
"""Processes the uploaded file and displays the results."""
progress_bar = st.progress(0)
try:
data = load_data(uploaded_file)
progress_bar.progress(25)
product_vectors = preprocess_data(data)
progress_bar.progress(50)
data = predict_and_score(data, product_vectors)
progress_bar.progress(75)
display_results(data)
progress_bar.progress(100)
except Exception as e:
st.write(f"⚠️ An error occurred: {str(e)}", color='red')
def load_data(uploaded_file):
"""Loads the uploaded CSV or Excel file into a DataFrame."""
if uploaded_file.name.endswith('.csv'):
return pd.read_csv(uploaded_file)
else:
return pd.read_excel(uploaded_file)
def preprocess_data(data):
"""Preprocesses the data and returns product vectors."""
data['Product Name'].fillna("", inplace=True)
return vectorizer.transform(data['Product Name'])
def predict_and_score(data, product_vectors):
"""Predicts SKUs and calculates similarity scores."""
data['Predicted SKU'] = rf_classifier.predict(product_vectors)
predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str))
similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors)
# Update 'Predicted SKU' based on similarity score
for i in range(similarity_scores.shape[0]):
if similarity_scores[i][i] == 0:
data.at[i, 'Predicted SKU'] = "-"
# Create SKU suggestions based on similarity score
data['SKU Suggestion'] = [
"Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed"
for i in range(similarity_scores.shape[0])
]
# Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column
data['SKU Suggestion'] = data.apply(
lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']),
axis=1
)
return data
def display_results(data):
"""Displays the processed data and a download link."""
st.subheader('3. Predicted Results')
# Show a preview of the data with an option to view all
num_rows = st.slider("Select number of rows to view", 5, len(data), 10)
st.write(data.head(num_rows))
st.subheader('4. Download Results')
st.markdown(get_table_download_link(data), unsafe_allow_html=True)
def get_table_download_link(df):
"""Generates a download link for the DataFrame."""
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'Download CSV File'
return href
if __name__ == "__main__":
main()