Spaces:
Sleeping
Sleeping
from itertools import product | |
import streamlit as st | |
import numpy as np | |
import pandas as pd | |
from PIL import Image, ImageOps, ImageDraw, ImageFont | |
import time | |
from paddleocr import PaddleOCR, draw_ocr | |
import os | |
from dotenv import load_dotenv | |
import os | |
from huggingface_hub import login | |
load_dotenv() # Load .env file | |
huggingface_token = os.getenv("HF_TOKEN") | |
login(huggingface_token) | |
##########################LLAMA3BI################################ | |
from huggingface_hub import InferenceClient | |
client = InferenceClient(api_key=huggingface_token) | |
messages = [ | |
{"role": "system", "content": """Your task is to get the product details out of the text given. | |
The text given will be raw text from OCR of social media images of products, | |
and the goal is to get product details and description so that it can be used for amazon product listing. | |
TRY TO KEEP THE LISTING IN FOLLOWING FORMAT. | |
📦 [Product Name] | |
💰 Price: $XX.XX | |
✨ Key Features: | |
• [Main Feature 1] | |
• [Main Feature 2] | |
• [Main Feature 3] | |
📸 [Product Image] | |
🏷 Available Now on Amazon | |
✈️ Prime Shipping Available | |
🛍 Shop Now: [Link] | |
🔍 Search: [Main Keywords] | |
[#RelevantHashtags] """}, | |
] | |
# Function to get Instagram post details | |
import instaloader | |
from io import BytesIO | |
import requests | |
def get_instagram_post_details(post_url): | |
try: | |
# Initialize Instaloader | |
L = instaloader.Instaloader() | |
# Extract shortcode from URL | |
shortcode = post_url.split('/')[-2] | |
# Load post using Instaloader | |
post = instaloader.Post.from_shortcode(L.context, shortcode) | |
# Retrieve caption | |
caption = post.caption | |
# Retrieve the image URL | |
image_url = post.url | |
# Fetch image using requests | |
response = requests.get(image_url) | |
response.raise_for_status() # Raise an exception for failed requests | |
# Open image using PIL and convert to NumPy array | |
img = Image.open(BytesIO(response.content)) | |
img_array = np.array(img) | |
return caption, img_array | |
except Exception as e: | |
return str(e), None | |
# Initialize PaddleOCR model | |
ocr = PaddleOCR(use_angle_cls=True, lang='en') | |
# Team details | |
team_members = [ | |
{"name": "Aman Deep", "image": "aman.jpg"}, # Replace with actual paths to images | |
{"name": "Nandini", "image": "nandini.jpg"}, | |
{"name": "Abhay Sharma", "image": "abhay.jpg"}, | |
{"name": "Ratan Prakash Mishra", "image": "anandimg.jpg"} | |
] | |
# Function to preprocess the images for the model | |
def preprocess_image(image): | |
""" | |
Preprocess the input image for model prediction. | |
Args: | |
image (PIL.Image): Input image in PIL format. | |
Returns: | |
np.ndarray: Preprocessed image array ready for prediction. | |
""" | |
try: | |
# Resize image to match model input size | |
img = image.resize((128, 128), Image.LANCZOS) # Using LANCZOS filter for high-quality resizing | |
# Convert image to NumPy array | |
img_array = np.array(img) | |
# Check if the image is grayscale and convert to RGB if needed | |
if img_array.ndim == 2: # Grayscale image | |
img_array = np.stack([img_array] * 3, axis=-1) # Convert to 3-channel RGB | |
elif img_array.shape[2] == 1: # Single-channel image | |
img_array = np.concatenate([img_array, img_array, img_array], axis=-1) # Convert to RGB | |
# Normalize pixel values to [0, 1] range | |
img_array = img_array / 255.0 | |
# Add batch dimension | |
img_array = np.expand_dims(img_array, axis=0) # Shape: (1, 128, 128, 3) | |
return img_array | |
except Exception as e: | |
print(f"Error processing image: {e}") | |
return None # Return None if there's an error | |
# Function to display circular images in a matrix format | |
def display_images_in_grid(images, max_images_per_row=4): | |
num_images = len(images) | |
num_rows = (num_images + max_images_per_row - 1) // max_images_per_row # Calculate number of rows | |
for i in range(num_rows): | |
cols = st.columns(min(max_images_per_row, num_images - i * max_images_per_row)) | |
for j, img in enumerate(images[i * max_images_per_row:(i + 1) * max_images_per_row]): | |
with cols[j]: | |
st.image(img, use_column_width=True) | |
# Function to display team members in circular format | |
def display_team_members(members, max_members_per_row=4): | |
num_members = len(members) | |
num_rows = (num_members + max_members_per_row - 1) // max_members_per_row # Calculate number of rows | |
for i in range(num_rows): | |
cols = st.columns(min(max_members_per_row, num_members - i * max_members_per_row)) | |
for j, member in enumerate(members[i * max_members_per_row:(i + 1) * max_members_per_row]): | |
with cols[j]: | |
img = Image.open(member["image"]) # Load the image | |
# circular_img = make_image_circular(img) # Convert to circular format | |
circular_img = img | |
st.image(circular_img, use_column_width=True) # Display the circular image | |
st.write(member["name"]) # Display the name below the image | |
# Function to simulate loading process with a progress bar | |
def simulate_progress(): | |
progress_bar = st.progress(0) | |
for percent_complete in range(100): | |
time.sleep(0.02) | |
progress_bar.progress(percent_complete + 1) | |
# Title and description | |
st.title("Amazon Smbhav") | |
# Team Details with links | |
st.sidebar.title("Amazon Smbhav") | |
st.sidebar.write("DELHI TECHNOLOGICAL UNIVERSITY") | |
# Navbar with task tabs | |
st.sidebar.title("Navigation") | |
st.sidebar.write("Team Name: sadhya") | |
app_mode = st.sidebar.selectbox("Choose the task", ["Welcome","Project Details", "Task 1","Team Details"]) | |
if app_mode == "Welcome": | |
# Navigation Menu | |
st.write("# Welcome to Amazon Smbhav! 🎉") | |
# Example for adding a local video | |
video_file = open('Finalist.mp4', 'rb') # Replace with the path to your video file | |
video_bytes = video_file.read() | |
# Embed the video using st.video() | |
st.video(video_bytes) | |
# Add a welcome image | |
welcome_image = Image.open("grid_banner.jpg") # Replace with the path to your welcome image | |
st.image(welcome_image, use_column_width=True) # Display the welcome image | |
elif app_mode=="Project Details": | |
st.markdown(""" | |
## Navigation | |
- [Project Overview](#project-overview) | |
- [Proposal Round](#proposal-round) | |
- [Problem Statement](#problem-statement) | |
- [Proposed Solution](#proposed-solution) | |
""") | |
# Project Overview | |
st.write("## Project Overview:") | |
st.write(""" | |
### Problem Statement | |
_Develop a system that automates Amazon product listings from social media content, extracting and organizing details from posts to generate accurate, engaging, and optimized listings._ | |
--- | |
### Solution Overview | |
Our system simplifies the listing process by analyzing social media content, using OCR, image recognition, LLMs, and internet data to create professional Amazon listings. | |
--- | |
### Task Breakdown | |
#### Task 1: OCR for Image and Label Details | |
**Objective:** Extract core product details from images, labels, and packaging found in social media posts. | |
- **Tools:** PaddleOCR, LLMs. | |
- **Approach:** | |
- Use PaddleOCR to scan images for text, identifying product names, brands, and key features. | |
- Apply LLMs to refine extracted data, categorize key information (product name, type, features), and enhance product descriptions. | |
- Integrate internet sources to cross-verify product details, retrieve additional information, and collect metadata like the brand background or product specs. | |
--- | |
#### Additional Task: Image Recognition & Object Counting | |
**Objective:** Quantify objects within social media images for batch products or multi-item listings. | |
- **Tools:** YOLOv8. | |
- **Approach:** | |
- Train YOLOv8 on a relevant dataset to recognize specific product types or packaging layouts. | |
- Use object detection counts to provide quantitative data (e.g., "3-item bundle"), enhancing accuracy in listings. | |
--- | |
#### Task 2: Data Validation & Structuring | |
**Objective:** Organize and validate extracted information, ensuring it’s formatted to meet Amazon’s listing requirements. | |
- **Tools:** Regex, LLMs. | |
- **Approach:** | |
- Format and validate extracted details into Amazon-compliant structures (titles, descriptions, bullet points). | |
- Use regex and parser tools for accuracy checks. | |
- Leverage LLMs to create compelling descriptions and marketing brochures. | |
- Search online for supplementary media (images/videos) to enrich the listing. | |
--- | |
#### Task 3: Amazon API Integration | |
**Objective:** Connect with Amazon’s API to publish fully formed product listings directly. | |
- **Tools:** Amazon MWS or Selling Partner API. | |
- **Approach:** | |
- Send structured listing data (text, media, product details) to Amazon’s API endpoints. | |
- Handle feedback for submission errors and make necessary adjustments. | |
- Develop a UI/dashboard for users to preview and edit listings before publishing. | |
--- | |
### Future Enhancements | |
- **Model Improvement:** Further refine OCR and parsing accuracy. | |
- **Dashboard Development:** Enable users to preview and customize listings. | |
- **Multi-Market Compatibility:** Expand support to other e-commerce platforms. | |
This approach automates listing creation directly from social media content, helping sellers quickly launch optimized Amazon product pages. | |
""") | |
elif app_mode == "Team Details": | |
st.write("## Meet Our Team:") | |
display_team_members(team_members) | |
st.write("Delhi Technological University") | |
elif app_mode == "Task 1": | |
st.write("## Task 1: 🖼️ OCR to Extract Details 📄") | |
st.write("Using OCR to extract details from product packaging material, including brand name and pack size.") | |
# Instantiate Instaloader | |
L = instaloader.Instaloader() | |
# Streamlit UI | |
st.title("Instagram Post Details Extractor") | |
# Text input for Instagram post URL | |
post_url = st.text_input("Enter Instagram Post URL:") | |
if post_url: | |
caption, imageArray = get_instagram_post_details(post_url) | |
if caption or imageArray.shape[0] > 0: | |
st.subheader("Caption:") | |
st.write(caption) | |
st.subheader("Image:") | |
st.image(imageArray, use_column_width=True) | |
# Convert image to numpy array for OCR processing | |
img_array = imageArray | |
############################# | |
# Perform OCR on the image | |
result = ocr.ocr(img_array, cls=True) | |
text = "" | |
for line in result: | |
for box in line: | |
currText, confidence = box[1][0], box[1][1] | |
text += currText + " " | |
message = f"image ocr: {text} Caption: {caption}" | |
st.write(message) | |
#OCR result text to be parsed here through LLM and get product listing content. | |
simulate_progress() | |
messages.append({"role": "user", "content": message}) | |
completion = client.chat.completions.create( | |
model="meta-llama/Llama-3.2-3B-Instruct", | |
messages=messages, | |
max_tokens=500 | |
) | |
productListingContent = completion.choices[0].message | |
st.markdown(productListingContent.content) | |
############################# | |
else: | |
st.error("Failed to retrieve the post details. Please check the URL. ////////") | |
# File uploader for images (supports multiple files) | |
uploaded_files = st.file_uploader("Upload images of products", type=["jpeg", "png", "jpg"], accept_multiple_files=True) | |
if uploaded_files: | |
st.write("### Uploaded Images in Circular Format:") | |
circular_images = [] | |
for uploaded_file in uploaded_files: | |
img = Image.open(uploaded_file) | |
circular_images.append(img) | |
# Display the circular images in a matrix/grid format | |
display_images_in_grid(circular_images, max_images_per_row=4) | |
if st.button("Start Analysis"): | |
simulate_progress() | |
# Loop through each uploaded image and process them | |
for uploaded_image in uploaded_files: | |
# Load the uploaded image | |
image = Image.open(uploaded_image) | |
# Convert image to numpy array for OCR processing | |
img_array = np.array(image) | |
# ############################# | |
# Perform OCR on the image | |
st.write(f"Extracting details from {uploaded_image.name}...") | |
result = ocr.ocr(img_array, cls=True) | |
text = "" | |
for line in result: | |
for box in line: | |
currText, confidence = box[1][0], box[1][1] | |
text += currText + " " | |
st.write(f"OCR Result: {text}") | |
#OCR result text to be parsed here through LLM and get product listing content. | |
simulate_progress() | |
messages.append({"role": "user", "content": text}) | |
completion = client.chat.completions.create( | |
model="meta-llama/Llama-3.2-3B-Instruct", | |
messages=messages, | |
max_tokens=500 | |
) | |
productListingContent = completion.choices[0].message | |
st.markdown(productListingContent.content) | |
st.markdown("---") | |
######################################### | |
else: | |
st.write("Please upload images to extract product details.") | |
# Footer with animation | |
st.markdown(""" | |
<style> | |
@keyframes fade-in { | |
from { opacity: 0; } | |
to { opacity: 1;} | |
} | |
.footer { | |
text-align: center; | |
font-size: 1.1em; | |
animation: fade-in 2s; | |
padding-top: 2rem; | |
} | |
</style> | |
<div class="footer"> | |
<p>© 2024 Amazon Smbhav Challenge. All rights reserved.</p> | |
</div> | |
""", unsafe_allow_html=True) |