Spaces:
Runtime error
Runtime error
# Import necessary libraries | |
import streamlit as st | |
import pandas as pd | |
from sklearn.feature_extraction.text import TfidfVectorizer | |
import numpy as np | |
import joblib | |
import base64 | |
from sklearn.metrics.pairwise import cosine_similarity | |
import re | |
from PIL import Image | |
# Load the trained Random Forest model and TF-IDF vectorizer | |
rf_classifier = joblib.load('random_forest_model.pkl') | |
vectorizer = joblib.load('tfidf_vectorizer.pkl') | |
image1 = Image.open('image1.PNG') | |
logo = Image.open('logo.png') | |
hide_streamlit_style = """ | |
<style> | |
#MainMenu {visibility: hidden;} | |
footer {visibility: hidden;} | |
</style> | |
""" | |
st.markdown(hide_streamlit_style, unsafe_allow_html=True) | |
def main(): | |
st.title('Batch Product SKU Predictor') | |
# Sidebar | |
display_sidebar() | |
# Main UI sections | |
st.subheader('1. File Upload') | |
uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx']) | |
if uploaded_file: | |
st.success("File uploaded successfully!") | |
st.subheader('2. Processing Data...') | |
process_data(uploaded_file) | |
else: | |
st.info("Please upload a CSV or Excel file to get started.") | |
def display_sidebar(): | |
"""Displays information on the sidebar.""" | |
st.sidebar.image(logo, width=250) | |
st.sidebar.header('About') | |
st.sidebar.text('This app predicts product SKUs based\non uploaded data.') | |
st.sidebar.subheader('Instructions:') | |
st.sidebar.text('1. Upload your data file.') | |
st.sidebar.text('2. Make sure your column name is\n"Product Name".') | |
st.sidebar.image(image1, 'example') | |
st.sidebar.text('3. Wait for processing.') | |
st.sidebar.text('4. View and download the results.') | |
# Function to transform product names into SKU names | |
def transform_to_sku(product_name): | |
if isinstance(product_name, str): | |
# Remove unwanted characters | |
product_name = product_name.replace('.', '').replace('@', '').replace('+', '') | |
# Remove parentheses | |
product_name = re.sub(r'\((.*?)\)', r'\1', product_name) | |
# Insert hyphens between numbers and letters if there is no space | |
product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name) | |
product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name) | |
# Split, join with hyphens, and convert to uppercase | |
sku_name = '-'.join(product_name.split()).upper() | |
# Collapse multiple hyphens into one | |
sku_name = re.sub(r'-{2,}', '-', sku_name) | |
else: | |
sku_name = "UNKNOWN-SKU" | |
return sku_name | |
def process_file_upload(): | |
"""Handles the file upload and processing.""" | |
uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx']) | |
if uploaded_file: | |
st.write("File uploaded successfully. Processing...") | |
process_data(uploaded_file) | |
else: | |
st.write("Awaiting file upload...") | |
def process_data(uploaded_file): | |
"""Processes the uploaded file and displays the results.""" | |
progress_bar = st.progress(0) | |
try: | |
data = load_data(uploaded_file) | |
progress_bar.progress(25) | |
product_vectors = preprocess_data(data) | |
progress_bar.progress(50) | |
data = predict_and_score(data, product_vectors) | |
progress_bar.progress(75) | |
display_results(data) | |
progress_bar.progress(100) | |
except Exception as e: | |
st.write(f"⚠️ An error occurred: {str(e)}", color='red') | |
def load_data(uploaded_file): | |
"""Loads the uploaded CSV or Excel file into a DataFrame.""" | |
if uploaded_file.name.endswith('.csv'): | |
return pd.read_csv(uploaded_file) | |
else: | |
return pd.read_excel(uploaded_file) | |
def preprocess_data(data): | |
"""Preprocesses the data and returns product vectors.""" | |
data['Product Name'].fillna("", inplace=True) | |
return vectorizer.transform(data['Product Name']) | |
def predict_and_score(data, product_vectors): | |
"""Predicts SKUs and calculates similarity scores.""" | |
data['Predicted SKU'] = rf_classifier.predict(product_vectors) | |
predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str)) | |
similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors) | |
# Update 'Predicted SKU' based on similarity score | |
for i in range(similarity_scores.shape[0]): | |
if similarity_scores[i][i] == 0: | |
data.at[i, 'Predicted SKU'] = "-" | |
# Create SKU suggestions based on similarity score | |
data['SKU Suggestion'] = [ | |
"Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed" | |
for i in range(similarity_scores.shape[0]) | |
] | |
# Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column | |
data['SKU Suggestion'] = data.apply( | |
lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']), | |
axis=1 | |
) | |
return data | |
def display_results(data): | |
"""Displays the processed data and a download link.""" | |
st.subheader('3. Predicted Results') | |
# Show a preview of the data with an option to view all | |
num_rows = st.slider("Select number of rows to view", 5, len(data), 10) | |
st.write(data.head(num_rows)) | |
st.subheader('4. Download Results') | |
st.markdown(get_table_download_link(data), unsafe_allow_html=True) | |
def get_table_download_link(df): | |
"""Generates a download link for the DataFrame.""" | |
csv = df.to_csv(index=False) | |
b64 = base64.b64encode(csv.encode()).decode() | |
href = f'<a href="data:file/csv;base64,{b64}" download="predicted_data.csv">Download CSV File</a>' | |
return href | |
if __name__ == "__main__": | |
main() | |