Spaces:

ardifarizky
/

amt_dsw_2023

Runtime error

amt_dsw_2023

File size: 5,794 Bytes

bba5e41

# Import necessary libraries
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import joblib
import base64
from sklearn.metrics.pairwise import cosine_similarity
import re
from PIL import Image

# Load the trained Random Forest model and TF-IDF vectorizer
rf_classifier = joblib.load('random_forest_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
image1 = Image.open('image1.PNG')
logo = Image.open('logo.png')

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

def main():
    st.title('Batch Product SKU Predictor')
    
    # Sidebar
    display_sidebar()
    
    # Main UI sections
    st.subheader('1. File Upload')
    uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx'])
    if uploaded_file:
        st.success("File uploaded successfully!")
        st.subheader('2. Processing Data...')
        process_data(uploaded_file)
    else:
        st.info("Please upload a CSV or Excel file to get started.")

def display_sidebar():
    """Displays information on the sidebar."""
    st.sidebar.image(logo, width=250)
    st.sidebar.header('About')
    st.sidebar.text('This app predicts product SKUs based\non uploaded data.')
    st.sidebar.subheader('Instructions:')
    st.sidebar.text('1. Upload your data file.')
    st.sidebar.text('2. Make sure your column name is\n"Product Name".')
    st.sidebar.image(image1, 'example')
    st.sidebar.text('3. Wait for processing.')
    st.sidebar.text('4. View and download the results.')
    # Function to transform product names into SKU names

def transform_to_sku(product_name):
    if isinstance(product_name, str):
        # Remove unwanted characters
        product_name = product_name.replace('.', '').replace('@', '').replace('+', '')
        # Remove parentheses
        product_name = re.sub(r'\((.*?)\)', r'\1', product_name)
        # Insert hyphens between numbers and letters if there is no space
        product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name)
        product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name)
        # Split, join with hyphens, and convert to uppercase
        sku_name = '-'.join(product_name.split()).upper()
        # Collapse multiple hyphens into one
        sku_name = re.sub(r'-{2,}', '-', sku_name)
    else:
        sku_name = "UNKNOWN-SKU"
    return sku_name

def process_file_upload():
    """Handles the file upload and processing."""
    uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'])
    if uploaded_file:
        st.write("File uploaded successfully. Processing...")
        process_data(uploaded_file)
    else:
        st.write("Awaiting file upload...")

def process_data(uploaded_file):
    """Processes the uploaded file and displays the results."""
    progress_bar = st.progress(0)
    try:
        data = load_data(uploaded_file)
        progress_bar.progress(25)
        
        product_vectors = preprocess_data(data)
        progress_bar.progress(50)
        
        data = predict_and_score(data, product_vectors)
        progress_bar.progress(75)
        
        display_results(data)
        progress_bar.progress(100)
    except Exception as e:
        st.write(f"⚠️ An error occurred: {str(e)}", color='red')

def load_data(uploaded_file):
    """Loads the uploaded CSV or Excel file into a DataFrame."""
    if uploaded_file.name.endswith('.csv'):
        return pd.read_csv(uploaded_file)
    else:
        return pd.read_excel(uploaded_file)

def preprocess_data(data):
    """Preprocesses the data and returns product vectors."""
    data['Product Name'].fillna("", inplace=True)
    return vectorizer.transform(data['Product Name'])

def predict_and_score(data, product_vectors):
    """Predicts SKUs and calculates similarity scores."""
    data['Predicted SKU'] = rf_classifier.predict(product_vectors)
    predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str))
    similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors)

    # Update 'Predicted SKU' based on similarity score
    for i in range(similarity_scores.shape[0]):
        if similarity_scores[i][i] == 0:
            data.at[i, 'Predicted SKU'] = "-"

    # Create SKU suggestions based on similarity score
    data['SKU Suggestion'] = [
        "Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed"
        for i in range(similarity_scores.shape[0])
    ]

    # Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column
    data['SKU Suggestion'] = data.apply(
        lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']), 
        axis=1
    )
    return data



def display_results(data):
    """Displays the processed data and a download link."""
    st.subheader('3. Predicted Results')
    
    # Show a preview of the data with an option to view all
    num_rows = st.slider("Select number of rows to view", 5, len(data), 10)
    st.write(data.head(num_rows))
    
    st.subheader('4. Download Results')
    st.markdown(get_table_download_link(data), unsafe_allow_html=True)
    

def get_table_download_link(df):
    """Generates a download link for the DataFrame."""
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="predicted_data.csv">Download CSV File</a>'
    return href

if __name__ == "__main__":
    main()