Spaces:
Runtime error
Runtime error
File size: 5,794 Bytes
bba5e41 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# Import necessary libraries
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import joblib
import base64
from sklearn.metrics.pairwise import cosine_similarity
import re
from PIL import Image
# Load the trained Random Forest model and TF-IDF vectorizer
rf_classifier = joblib.load('random_forest_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
image1 = Image.open('image1.PNG')
logo = Image.open('logo.png')
hide_streamlit_style = """
<style>
#MainMenu {visibility: hidden;}
footer {visibility: hidden;}
</style>
"""
st.markdown(hide_streamlit_style, unsafe_allow_html=True)
def main():
st.title('Batch Product SKU Predictor')
# Sidebar
display_sidebar()
# Main UI sections
st.subheader('1. File Upload')
uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx'])
if uploaded_file:
st.success("File uploaded successfully!")
st.subheader('2. Processing Data...')
process_data(uploaded_file)
else:
st.info("Please upload a CSV or Excel file to get started.")
def display_sidebar():
"""Displays information on the sidebar."""
st.sidebar.image(logo, width=250)
st.sidebar.header('About')
st.sidebar.text('This app predicts product SKUs based\non uploaded data.')
st.sidebar.subheader('Instructions:')
st.sidebar.text('1. Upload your data file.')
st.sidebar.text('2. Make sure your column name is\n"Product Name".')
st.sidebar.image(image1, 'example')
st.sidebar.text('3. Wait for processing.')
st.sidebar.text('4. View and download the results.')
# Function to transform product names into SKU names
def transform_to_sku(product_name):
if isinstance(product_name, str):
# Remove unwanted characters
product_name = product_name.replace('.', '').replace('@', '').replace('+', '')
# Remove parentheses
product_name = re.sub(r'\((.*?)\)', r'\1', product_name)
# Insert hyphens between numbers and letters if there is no space
product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name)
product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name)
# Split, join with hyphens, and convert to uppercase
sku_name = '-'.join(product_name.split()).upper()
# Collapse multiple hyphens into one
sku_name = re.sub(r'-{2,}', '-', sku_name)
else:
sku_name = "UNKNOWN-SKU"
return sku_name
def process_file_upload():
"""Handles the file upload and processing."""
uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'])
if uploaded_file:
st.write("File uploaded successfully. Processing...")
process_data(uploaded_file)
else:
st.write("Awaiting file upload...")
def process_data(uploaded_file):
"""Processes the uploaded file and displays the results."""
progress_bar = st.progress(0)
try:
data = load_data(uploaded_file)
progress_bar.progress(25)
product_vectors = preprocess_data(data)
progress_bar.progress(50)
data = predict_and_score(data, product_vectors)
progress_bar.progress(75)
display_results(data)
progress_bar.progress(100)
except Exception as e:
st.write(f"⚠️ An error occurred: {str(e)}", color='red')
def load_data(uploaded_file):
"""Loads the uploaded CSV or Excel file into a DataFrame."""
if uploaded_file.name.endswith('.csv'):
return pd.read_csv(uploaded_file)
else:
return pd.read_excel(uploaded_file)
def preprocess_data(data):
"""Preprocesses the data and returns product vectors."""
data['Product Name'].fillna("", inplace=True)
return vectorizer.transform(data['Product Name'])
def predict_and_score(data, product_vectors):
"""Predicts SKUs and calculates similarity scores."""
data['Predicted SKU'] = rf_classifier.predict(product_vectors)
predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str))
similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors)
# Update 'Predicted SKU' based on similarity score
for i in range(similarity_scores.shape[0]):
if similarity_scores[i][i] == 0:
data.at[i, 'Predicted SKU'] = "-"
# Create SKU suggestions based on similarity score
data['SKU Suggestion'] = [
"Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed"
for i in range(similarity_scores.shape[0])
]
# Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column
data['SKU Suggestion'] = data.apply(
lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']),
axis=1
)
return data
def display_results(data):
"""Displays the processed data and a download link."""
st.subheader('3. Predicted Results')
# Show a preview of the data with an option to view all
num_rows = st.slider("Select number of rows to view", 5, len(data), 10)
st.write(data.head(num_rows))
st.subheader('4. Download Results')
st.markdown(get_table_download_link(data), unsafe_allow_html=True)
def get_table_download_link(df):
"""Generates a download link for the DataFrame."""
csv = df.to_csv(index=False)
b64 = base64.b64encode(csv.encode()).decode()
href = f'<a href="data:file/csv;base64,{b64}" download="predicted_data.csv">Download CSV File</a>'
return href
if __name__ == "__main__":
main()
|