Spaces:

ardifarizky
/

amt_dsw_2023

Runtime error

App Files Files Community

amt_dsw_2023 / app.py

ardifarizky

Upload 7 files

bba5e41 8 months ago

raw history blame

No virus

5.79 kB

	# Import necessary libraries
	import streamlit as st
	import pandas as pd
	from sklearn.feature_extraction.text import TfidfVectorizer
	import numpy as np
	import joblib
	import base64
	from sklearn.metrics.pairwise import cosine_similarity
	import re
	from PIL import Image

	# Load the trained Random Forest model and TF-IDF vectorizer
	rf_classifier = joblib.load('random_forest_model.pkl')
	vectorizer = joblib.load('tfidf_vectorizer.pkl')
	image1 = Image.open('image1.PNG')
	logo = Image.open('logo.png')

	hide_streamlit_style = """
	<style>
	#MainMenu {visibility: hidden;}
	footer {visibility: hidden;}
	</style>
	"""
	st.markdown(hide_streamlit_style, unsafe_allow_html=True)

	def main():
	st.title('Batch Product SKU Predictor')

	# Sidebar
	display_sidebar()

	# Main UI sections
	st.subheader('1. File Upload')
	uploaded_file = st.file_uploader("Choose a CSV or Excel file. Make sure the number of rows is less than 20,000.", type=['csv', 'xlsx'])
	if uploaded_file:
	st.success("File uploaded successfully!")
	st.subheader('2. Processing Data...')
	process_data(uploaded_file)
	else:
	st.info("Please upload a CSV or Excel file to get started.")

	def display_sidebar():
	"""Displays information on the sidebar."""
	st.sidebar.image(logo, width=250)
	st.sidebar.header('About')
	st.sidebar.text('This app predicts product SKUs based\non uploaded data.')
	st.sidebar.subheader('Instructions:')
	st.sidebar.text('1. Upload your data file.')
	st.sidebar.text('2. Make sure your column name is\n"Product Name".')
	st.sidebar.image(image1, 'example')
	st.sidebar.text('3. Wait for processing.')
	st.sidebar.text('4. View and download the results.')
	# Function to transform product names into SKU names

	def transform_to_sku(product_name):
	if isinstance(product_name, str):
	# Remove unwanted characters
	product_name = product_name.replace('.', '').replace('@', '').replace('+', '')
	# Remove parentheses
	product_name = re.sub(r'\((.*?)\)', r'\1', product_name)
	# Insert hyphens between numbers and letters if there is no space
	product_name = re.sub(r'(\d+)([a-zA-Z])', r'\1-\2', product_name)
	product_name = re.sub(r'([a-zA-Z])(\d+)', r'\1-\2', product_name)
	# Split, join with hyphens, and convert to uppercase
	sku_name = '-'.join(product_name.split()).upper()
	# Collapse multiple hyphens into one
	sku_name = re.sub(r'-{2,}', '-', sku_name)
	else:
	sku_name = "UNKNOWN-SKU"
	return sku_name

	def process_file_upload():
	"""Handles the file upload and processing."""
	uploaded_file = st.file_uploader("Choose a CSV or Excel file", type=['csv', 'xlsx'])
	if uploaded_file:
	st.write("File uploaded successfully. Processing...")
	process_data(uploaded_file)
	else:
	st.write("Awaiting file upload...")

	def process_data(uploaded_file):
	"""Processes the uploaded file and displays the results."""
	progress_bar = st.progress(0)
	try:
	data = load_data(uploaded_file)
	progress_bar.progress(25)

	product_vectors = preprocess_data(data)
	progress_bar.progress(50)

	data = predict_and_score(data, product_vectors)
	progress_bar.progress(75)

	display_results(data)
	progress_bar.progress(100)
	except Exception as e:
	st.write(f"⚠️ An error occurred: {str(e)}", color='red')

	def load_data(uploaded_file):
	"""Loads the uploaded CSV or Excel file into a DataFrame."""
	if uploaded_file.name.endswith('.csv'):
	return pd.read_csv(uploaded_file)
	else:
	return pd.read_excel(uploaded_file)

	def preprocess_data(data):
	"""Preprocesses the data and returns product vectors."""
	data['Product Name'].fillna("", inplace=True)
	return vectorizer.transform(data['Product Name'])

	def predict_and_score(data, product_vectors):
	"""Predicts SKUs and calculates similarity scores."""
	data['Predicted SKU'] = rf_classifier.predict(product_vectors)
	predicted_sku_vectors = vectorizer.transform(data['Predicted SKU'].astype(str))
	similarity_scores = cosine_similarity(product_vectors, predicted_sku_vectors)

	# Update 'Predicted SKU' based on similarity score
	for i in range(similarity_scores.shape[0]):
	if similarity_scores[i][i] == 0:
	data.at[i, 'Predicted SKU'] = "-"

	# Create SKU suggestions based on similarity score
	data['SKU Suggestion'] = [
	"Propose New SKU" if similarity_scores[i][i] < 0.5 else "No Action Needed"
	for i in range(similarity_scores.shape[0])
	]

	# Apply the transformation function to the 'Product Name' column to create a 'Transformed SKU' column
	data['SKU Suggestion'] = data.apply(
	lambda row: '-' if row['SKU Suggestion'] == "No Action Needed" else transform_to_sku(row['Product Name']),
	axis=1
	)
	return data



	def display_results(data):
	"""Displays the processed data and a download link."""
	st.subheader('3. Predicted Results')

	# Show a preview of the data with an option to view all
	num_rows = st.slider("Select number of rows to view", 5, len(data), 10)
	st.write(data.head(num_rows))

	st.subheader('4. Download Results')
	st.markdown(get_table_download_link(data), unsafe_allow_html=True)


	def get_table_download_link(df):
	"""Generates a download link for the DataFrame."""
	csv = df.to_csv(index=False)
	b64 = base64.b64encode(csv.encode()).decode()
	href = f'<a href="data:file/csv;base64,{b64}" download="predicted_data.csv">Download CSV File</a>'
	return href

	if __name__ == "__main__":
	main()