File size: 5,794 Bytes
bba5e41
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# Import necessary libraries
import streamlit as st
import pandas as pd
from sklearn.feature_extraction.text import TfidfVectorizer
import numpy as np
import joblib
import base64
from sklearn.metrics.pairwise import cosine_similarity
import re
from PIL import Image

# Load the trained Random Forest model and TF-IDF vectorizer
rf_classifier = joblib.load('random_forest_model.pkl')
vectorizer = joblib.load('tfidf_vectorizer.pkl')
image1 = Image.open('image1.PNG')
logo = Image.open('logo.png')

hide_streamlit_style = """
            <style>
            #MainMenu {visibility: hidden;}
            footer {visibility: hidden;}
            </style>
            """
st.markdown(hide_streamlit_style, unsafe_allow_html=True) 

def main():
    st.title('Batch Product SKU Predictor')
    
    # Sidebar
    display_sidebar()
    
    # Main UI sections
    st.subheader('1. File Upload')
    uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx'])
    if uploaded_file:
        st.success("File uploaded successfully!")
        st.subheader('2. Processing Data...')
        process_data(uploaded_file)
    else:
        st.info("Please upload a CSV or Excel file to get started.")

def display_sidebar():
    """Displays information on the sidebar."""
    st.sidebar.image(logo, width=250)
    st.sidebar.header('About')
    st.sidebar.text('This app predicts product SKUs based\non uploaded data.')
    st.sidebar.subheader('Instructions:')
    st.sidebar.text('1. Upload your data file.')
    st.sidebar.text('2. Make sure your column name is\n"Product Name".')
    st.sidebar.image(image1, 'example')
    st.sidebar.text('3. Wait for processing.')
    st.sidebar.text('4. View and download the results.')
    # Function to transform product names into SKU names

def transform_to_sku(product_name):
    if isinstance(product_name, str):
        # Remove unwanted characters
        product_name = product_name.replace('.', '').replace('@', '').replace('+', '')
        # Remove parentheses
        product_name = re.sub(r'\((.*?)\)', r'\1', product_name)
        # Insert hyphens between numbers and letters if there is no space
        product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name)
        product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name)
        # Split, join with hyphens, and convert to uppercase
        sku_name = '-'.join(product_name.split()).upper()
        # Collapse multiple hyphens into one
        sku_name = re.sub(r'-{2,}', '-', sku_name)
    else:
        sku_name = "UNKNOWN-SKU"
    return sku_name

def process_file_upload():
    """Handles the file upload and processing."""
    uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'])
    if uploaded_file:
        st.write("File uploaded successfully. Processing...")
        process_data(uploaded_file)
    else:
        st.write("Awaiting file upload...")

def process_data(uploaded_file):
    """Processes the uploaded file and displays the results."""
    progress_bar = st.progress(0)
    try:
        data = load_data(uploaded_file)
        progress_bar.progress(25)
        
        product_vectors = preprocess_data(data)
        progress_bar.progress(50)
        
        data = predict_and_score(data, product_vectors)
        progress_bar.progress(75)
        
        display_results(data)
        progress_bar.progress(100)
    except Exception as e:
        st.write(f"⚠️ An error occurred: {str(e)}", color='red')

def load_data(uploaded_file):
    """Loads the uploaded CSV or Excel file into a DataFrame."""
    if uploaded_file.name.endswith('.csv'):
        return pd.read_csv(uploaded_file)
    else:
        return pd.read_excel(uploaded_file)

def preprocess_data(data):
    """Preprocesses the data and returns product vectors."""
    data['Product Name'].fillna("", inplace=True)
    return vectorizer.transform(data['Product Name'])

def predict_and_score(data, product_vectors):
    """Predicts SKUs and calculates similarity scores."""
    data['Predicted SKU'] = rf_classifier.predict(product_vectors)
    predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str))
    similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors)

    # Update 'Predicted SKU' based on similarity score
    for i in range(similarity_scores.shape[0]):
        if similarity_scores[i][i] == 0:
            data.at[i, 'Predicted SKU'] = "-"

    # Create SKU suggestions based on similarity score
    data['SKU Suggestion'] = [
        "Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed"
        for i in range(similarity_scores.shape[0])
    ]

    # Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column
    data['SKU Suggestion'] = data.apply(
        lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']), 
        axis=1
    )
    return data



def display_results(data):
    """Displays the processed data and a download link."""
    st.subheader('3. Predicted Results')
    
    # Show a preview of the data with an option to view all
    num_rows = st.slider("Select number of rows to view", 5, len(data), 10)
    st.write(data.head(num_rows))
    
    st.subheader('4. Download Results')
    st.markdown(get_table_download_link(data), unsafe_allow_html=True)
    

def get_table_download_link(df):
    """Generates a download link for the DataFrame."""
    csv = df.to_csv(index=False)
    b64 = base64.b64encode(csv.encode()).decode()
    href = f'<a href="data:file/csv;base64,{b64}" download="predicted_data.csv">Download CSV File</a>'
    return href

if __name__ == "__main__":
    main()