# Import necessary libraries import streamlit as st import pandas as pd from sklearn.feature_extraction.text import TfidfVectorizer import numpy as np import joblib import base64 from sklearn.metrics.pairwise import cosine_similarity import re from PIL import Image # Load the trained Random Forest model and TF-IDF vectorizer rf_classifier = joblib.load('random_forest_model.pkl') vectorizer = joblib.load('tfidf_vectorizer.pkl') image1 = Image.open('image1.PNG') logo = Image.open('logo.png') hide_streamlit_style = """ """ st.markdown(hide_streamlit_style, unsafe_allow_html=True) def main(): st.title('Batch Product SKU Predictor') # Sidebar display_sidebar() # Main UI sections st.subheader('1. File Upload') uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx']) if uploaded_file: st.success("File uploaded successfully!") st.subheader('2. Processing Data...') process_data(uploaded_file) else: st.info("Please upload a CSV or Excel file to get started.") def display_sidebar(): """Displays information on the sidebar.""" st.sidebar.image(logo, width=250) st.sidebar.header('About') st.sidebar.text('This app predicts product SKUs based\non uploaded data.') st.sidebar.subheader('Instructions:') st.sidebar.text('1. Upload your data file.') st.sidebar.text('2. Make sure your column name is\n"Product Name".') st.sidebar.image(image1, 'example') st.sidebar.text('3. Wait for processing.') st.sidebar.text('4. View and download the results.') # Function to transform product names into SKU names def transform_to_sku(product_name): if isinstance(product_name, str): # Remove unwanted characters product_name = product_name.replace('.', '').replace('@', '').replace('+', '') # Remove parentheses product_name = re.sub(r'\((.*?)\)', r'\1', product_name) # Insert hyphens between numbers and letters if there is no space product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name) product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name) # Split, join with hyphens, and convert to uppercase sku_name = '-'.join(product_name.split()).upper() # Collapse multiple hyphens into one sku_name = re.sub(r'-{2,}', '-', sku_name) else: sku_name = "UNKNOWN-SKU" return sku_name def process_file_upload(): """Handles the file upload and processing.""" uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx']) if uploaded_file: st.write("File uploaded successfully. Processing...") process_data(uploaded_file) else: st.write("Awaiting file upload...") def process_data(uploaded_file): """Processes the uploaded file and displays the results.""" progress_bar = st.progress(0) try: data = load_data(uploaded_file) progress_bar.progress(25) product_vectors = preprocess_data(data) progress_bar.progress(50) data = predict_and_score(data, product_vectors) progress_bar.progress(75) display_results(data) progress_bar.progress(100) except Exception as e: st.write(f"⚠️ An error occurred: {str(e)}", color='red') def load_data(uploaded_file): """Loads the uploaded CSV or Excel file into a DataFrame.""" if uploaded_file.name.endswith('.csv'): return pd.read_csv(uploaded_file) else: return pd.read_excel(uploaded_file) def preprocess_data(data): """Preprocesses the data and returns product vectors.""" data['Product Name'].fillna("", inplace=True) return vectorizer.transform(data['Product Name']) def predict_and_score(data, product_vectors): """Predicts SKUs and calculates similarity scores.""" data['Predicted SKU'] = rf_classifier.predict(product_vectors) predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str)) similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors) # Update 'Predicted SKU' based on similarity score for i in range(similarity_scores.shape[0]): if similarity_scores[i][i] == 0: data.at[i, 'Predicted SKU'] = "-" # Create SKU suggestions based on similarity score data['SKU Suggestion'] = [ "Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed" for i in range(similarity_scores.shape[0]) ] # Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column data['SKU Suggestion'] = data.apply( lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']), axis=1 ) return data def display_results(data): """Displays the processed data and a download link.""" st.subheader('3. Predicted Results') # Show a preview of the data with an option to view all num_rows = st.slider("Select number of rows to view", 5, len(data), 10) st.write(data.head(num_rows)) st.subheader('4. Download Results') st.markdown(get_table_download_link(data), unsafe_allow_html=True) def get_table_download_link(df): """Generates a download link for the DataFrame.""" csv = df.to_csv(index=False) b64 = base64.b64encode(csv.encode()).decode() href = f'Download CSV File' return href if __name__ == "__main__": main()