Spaces:

ardifarizky
/

amt_dsw_2023

Runtime error

App Files Files Community

ardifarizky commited on Nov 12, 2023

Commit

bba5e41

•

1 Parent(s): c98ff59

Upload 7 files

Browse files

Files changed (7) hide show

Dockerfile +7 -0
app.py +157 -0
image1.PNG +0 -0
logo.png +0 -0
random_forest_model.pkl +3 -0
requirements.txt +8 -0
tfidf_vectorizer.pkl +3 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,7 @@

+FROM python:3.9
+EXPOSE 8080
+ADD requirements.txt requirements.txt
+RUN pip install -r requirements.txt
+WORKDIR /app
+COPY . ./
+ENTRYPOINT ["streamlit", "run", "st.py", "--server.port=8080", "--server.address=0.0.0.0"]

app.py ADDED Viewed

	@@ -0,0 +1,157 @@

+# Import necessary libraries
+import streamlit as st
+import pandas as pd
+from sklearn.feature_extraction.text import TfidfVectorizer
+import numpy as np
+import joblib
+import base64
+from sklearn.metrics.pairwise import cosine_similarity
+import re
+from PIL import Image
+# Load the trained Random Forest model and TF-IDF vectorizer
+rf_classifier = joblib.load('random_forest_model.pkl')
+vectorizer = joblib.load('tfidf_vectorizer.pkl')
+image1 = Image.open('image1.PNG')
+logo = Image.open('logo.png')
+hide_streamlit_style = """
+            <style>
+            #MainMenu {visibility: hidden;}
+            footer {visibility: hidden;}
+            </style>
+            """
+st.markdown(hide_streamlit_style, unsafe_allow_html=True)
+def main():
+    st.title('Batch Product SKU Predictor')
+    # Sidebar
+    display_sidebar()
+    # Main UI sections
+    st.subheader('1. File Upload')
+    uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx'])
+    if uploaded_file:
+        st.success("File uploaded successfully!")
+        st.subheader('2. Processing Data...')
+        process_data(uploaded_file)
+    else:
+        st.info("Please upload a CSV or Excel file to get started.")
+def display_sidebar():
+    """Displays information on the sidebar."""
+    st.sidebar.image(logo, width=250)
+    st.sidebar.header('About')
+    st.sidebar.text('This app predicts product SKUs based\non uploaded data.')
+    st.sidebar.subheader('Instructions:')
+    st.sidebar.text('1. Upload your data file.')
+    st.sidebar.text('2. Make sure your column name is\n"Product Name".')
+    st.sidebar.image(image1, 'example')
+    st.sidebar.text('3. Wait for processing.')
+    st.sidebar.text('4. View and download the results.')
+    # Function to transform product names into SKU names
+def transform_to_sku(product_name):
+    if isinstance(product_name, str):
+        # Remove unwanted characters
+        product_name = product_name.replace('.', '').replace('@', '').replace('+', '')
+        # Remove parentheses
+        product_name = re.sub(r'\((.*?)\)', r'\1', product_name)
+        # Insert hyphens between numbers and letters if there is no space
+        product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name)
+        product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name)
+        # Split, join with hyphens, and convert to uppercase
+        sku_name = '-'.join(product_name.split()).upper()
+        # Collapse multiple hyphens into one
+        sku_name = re.sub(r'-{2,}', '-', sku_name)
+    else:
+        sku_name = "UNKNOWN-SKU"
+    return sku_name
+def process_file_upload():
+    """Handles the file upload and processing."""
+    uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'])
+    if uploaded_file:
+        st.write("File uploaded successfully. Processing...")
+        process_data(uploaded_file)
+    else:
+        st.write("Awaiting file upload...")
+def process_data(uploaded_file):
+    """Processes the uploaded file and displays the results."""
+    progress_bar = st.progress(0)
+    try:
+        data = load_data(uploaded_file)
+        progress_bar.progress(25)
+        product_vectors = preprocess_data(data)
+        progress_bar.progress(50)
+        data = predict_and_score(data, product_vectors)
+        progress_bar.progress(75)
+        display_results(data)
+        progress_bar.progress(100)
+    except Exception as e:
+        st.write(f"⚠️ An error occurred: {str(e)}", color='red')
+def load_data(uploaded_file):
+    """Loads the uploaded CSV or Excel file into a DataFrame."""
+    if uploaded_file.name.endswith('.csv'):
+        return pd.read_csv(uploaded_file)
+    else:
+        return pd.read_excel(uploaded_file)
+def preprocess_data(data):
+    """Preprocesses the data and returns product vectors."""
+    data['Product Name'].fillna("", inplace=True)
+    return vectorizer.transform(data['Product Name'])
+def predict_and_score(data, product_vectors):
+    """Predicts SKUs and calculates similarity scores."""
+    data['Predicted SKU'] = rf_classifier.predict(product_vectors)
+    predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str))
+    similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors)
+    # Update 'Predicted SKU' based on similarity score
+    for i in range(similarity_scores.shape[0]):
+        if similarity_scores[i][i] == 0:
+            data.at[i, 'Predicted SKU'] = "-"
+    # Create SKU suggestions based on similarity score
+    data['SKU Suggestion'] = [
+        "Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed"
+        for i in range(similarity_scores.shape[0])
+    ]
+    # Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column
+    data['SKU Suggestion'] = data.apply(
+        lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']),
+        axis=1
+    )
+    return data
+def display_results(data):
+    """Displays the processed data and a download link."""
+    st.subheader('3. Predicted Results')
+    # Show a preview of the data with an option to view all
+    num_rows = st.slider("Select number of rows to view", 5, len(data), 10)
+    st.write(data.head(num_rows))
+    st.subheader('4. Download Results')
+    st.markdown(get_table_download_link(data), unsafe_allow_html=True)
+def get_table_download_link(df):
+    """Generates a download link for the DataFrame."""
+    csv = df.to_csv(index=False)
+    b64 = base64.b64encode(csv.encode()).decode()
+    href = f'<a href="data:file/csv;base64,{b64}" download="predicted_data.csv">Download CSV File</a>'
+    return href
+if __name__ == "__main__":
+    main()

image1.PNG ADDED Viewed

logo.png ADDED Viewed

random_forest_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4cb31a2892079651c48d307da5dfed3928f7e458e4ba9cd58c3fb310d9e7209d
+size 115786409

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+streamlit
+pandas
+scikit-learn==1.3
+numpy
+joblib
+regex
+openpyxl
+Pillow

tfidf_vectorizer.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:729ec12ad98cde5efdf61750411ecd8477347dab6329700e44dfe02c21ea5c70
+size 434688