|
import gradio as gr |
|
import torch |
|
from transformers import AutoTokenizer, AutoModelForSequenceClassification |
|
import json |
|
import os |
|
import requests |
|
import re |
|
|
|
|
|
def extract_text_from_html(html): |
|
""" |
|
Extract text from HTML without using BeautifulSoup |
|
""" |
|
|
|
text = re.sub(r'<[^>]+>', ' ', html) |
|
|
|
text = re.sub(r'\s+', ' ', text) |
|
|
|
text = text.replace(' ', ' ').replace('&', '&').replace('<', '<').replace('>', '>') |
|
return text.strip() |
|
|
|
|
|
SAMPLE_DEALS = [ |
|
{ |
|
"id": 1, |
|
"title": { |
|
"rendered": "Apple AirPods Pro (2nd Generation) - 20% Off" |
|
}, |
|
"link": "https://www.example.com/deals/airpods-pro", |
|
"date": "2025-02-25T10:00:00", |
|
"content": { |
|
"rendered": "<p>Get the latest Apple AirPods Pro (2nd Generation) for 20% off the regular price. These wireless earbuds feature active noise cancellation, transparency mode, and spatial audio with dynamic head tracking.</p><p>Regular price: $249.99</p><p>Deal price: $199.99</p><p>You save: $50.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Apple AirPods Pro (2nd Generation) with active noise cancellation and transparency mode. Now 20% off - only $199.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 2, |
|
"title": { |
|
"rendered": "Samsung 65\" QLED 4K Smart TV - $300 Off" |
|
}, |
|
"link": "https://www.example.com/deals/samsung-qled-tv", |
|
"date": "2025-02-26T09:30:00", |
|
"content": { |
|
"rendered": "<p>Upgrade your home entertainment with this Samsung 65\" QLED 4K Smart TV. Features Quantum HDR, Motion Xcelerator Turbo+, and Object Tracking Sound for an immersive viewing experience.</p><p>Regular price: $1,299.99</p><p>Deal price: $999.99</p><p>You save: $300.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Samsung 65\" QLED 4K Smart TV with Quantum HDR and Object Tracking Sound. Save $300 - now only $999.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 3, |
|
"title": { |
|
"rendered": "Sony WH-1000XM5 Wireless Headphones - 25% Off" |
|
}, |
|
"link": "https://www.example.com/deals/sony-wh1000xm5", |
|
"date": "2025-02-26T14:15:00", |
|
"content": { |
|
"rendered": "<p>Experience industry-leading noise cancellation with the Sony WH-1000XM5 wireless headphones. Features 30-hour battery life, quick charging, and exceptional sound quality with the new Integrated Processor V1.</p><p>Regular price: $399.99</p><p>Deal price: $299.99</p><p>You save: $100.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Sony WH-1000XM5 wireless headphones with industry-leading noise cancellation and 30-hour battery life. Now 25% off at $299.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 4, |
|
"title": { |
|
"rendered": "Bose QuietComfort Ultra Headphones - 20% Off" |
|
}, |
|
"link": "https://www.example.com/deals/bose-quietcomfort-ultra", |
|
"date": "2025-02-25T15:30:00", |
|
"content": { |
|
"rendered": "<p>Experience the ultimate in noise cancellation with Bose QuietComfort Ultra headphones. Features spatial audio, custom EQ, and up to 24 hours of battery life.</p><p>Regular price: $429.99</p><p>Deal price: $343.99</p><p>You save: $86.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Bose QuietComfort Ultra headphones with advanced noise cancellation and spatial audio. Now 20% off at $343.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 5, |
|
"title": { |
|
"rendered": "Beats Studio Pro Wireless Headphones - 40% Off" |
|
}, |
|
"link": "https://www.example.com/deals/beats-studio-pro", |
|
"date": "2025-02-26T16:30:00", |
|
"content": { |
|
"rendered": "<p>The Beats Studio Pro wireless headphones deliver premium sound with active noise cancellation, transparency mode, and up to 40 hours of battery life.</p><p>Regular price: $349.99</p><p>Deal price: $209.99</p><p>You save: $140.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Beats Studio Pro wireless headphones with active noise cancellation and 40-hour battery life. Now 40% off at $209.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 6, |
|
"title": { |
|
"rendered": "Dyson V12 Detect Slim Cordless Vacuum - $150 Off" |
|
}, |
|
"link": "https://www.example.com/deals/dyson-v12", |
|
"date": "2025-02-27T08:45:00", |
|
"content": { |
|
"rendered": "<p>The Dyson V12 Detect Slim cordless vacuum features a laser that reveals microscopic dust, an LCD screen that displays particle counts, and powerful suction for deep cleaning.</p><p>Regular price: $649.99</p><p>Deal price: $499.99</p><p>You save: $150.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Dyson V12 Detect Slim cordless vacuum with laser dust detection and powerful suction. Save $150 - now only $499.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 7, |
|
"title": { |
|
"rendered": "Nintendo Switch OLED Model - Bundle Deal" |
|
}, |
|
"link": "https://www.example.com/deals/nintendo-switch-oled", |
|
"date": "2025-02-27T11:20:00", |
|
"content": { |
|
"rendered": "<p>Get the Nintendo Switch OLED Model with a vibrant 7-inch OLED screen, plus two games and a carrying case. The perfect gaming package for home or on-the-go play.</p><p>Regular price: $439.99</p><p>Deal price: $379.99</p><p>You save: $60.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Nintendo Switch OLED Model bundle with two games and carrying case. Special bundle price of $379.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 8, |
|
"title": { |
|
"rendered": "MacBook Air M3 - $200 Off" |
|
}, |
|
"link": "https://www.example.com/deals/macbook-air-m3", |
|
"date": "2025-02-26T10:45:00", |
|
"content": { |
|
"rendered": "<p>The latest MacBook Air with M3 chip offers incredible performance and battery life in an ultra-thin design. Features a 13.6-inch Liquid Retina display, 8GB RAM, and 256GB SSD storage.</p><p>Regular price: $1,099.99</p><p>Deal price: $899.99</p><p>You save: $200.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>MacBook Air with M3 chip, 13.6-inch Liquid Retina display, and all-day battery life. Save $200 - now only $899.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 9, |
|
"title": { |
|
"rendered": "Kindle Paperwhite Signature Edition - 30% Off" |
|
}, |
|
"link": "https://www.example.com/deals/kindle-paperwhite", |
|
"date": "2025-02-27T09:15:00", |
|
"content": { |
|
"rendered": "<p>The Kindle Paperwhite Signature Edition features a 6.8-inch display, wireless charging, auto-adjusting front light, and 32GB storage. Perfect for reading anywhere, anytime.</p><p>Regular price: $189.99</p><p>Deal price: $132.99</p><p>You save: $57.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>Kindle Paperwhite Signature Edition with 6.8-inch display, wireless charging, and 32GB storage. Now 30% off at $132.99!</p>" |
|
} |
|
}, |
|
{ |
|
"id": 10, |
|
"title": { |
|
"rendered": "LG C3 65\" OLED 4K Smart TV - $500 Off" |
|
}, |
|
"link": "https://www.example.com/deals/lg-c3-oled", |
|
"date": "2025-02-25T13:00:00", |
|
"content": { |
|
"rendered": "<p>Experience stunning picture quality with the LG C3 65\" OLED 4K Smart TV. Features self-lit OLED pixels, Dolby Vision, Dolby Atmos, and NVIDIA G-SYNC for gaming.</p><p>Regular price: $1,799.99</p><p>Deal price: $1,299.99</p><p>You save: $500.00</p>" |
|
}, |
|
"excerpt": { |
|
"rendered": "<p>LG C3 65\" OLED 4K Smart TV with self-lit pixels and Dolby Vision. Save $500 - now only $1,299.99!</p>" |
|
} |
|
} |
|
] |
|
|
|
|
|
def fetch_deals_data(url="https://www.dealsfinders.com/wp-json/wp/v2/posts", num_pages=2, per_page=100, use_sample_data=False): |
|
""" |
|
Fetch deals data exclusively from the DealsFinders API or use sample data |
|
""" |
|
|
|
if use_sample_data: |
|
print("Using sample deals data") |
|
return SAMPLE_DEALS |
|
|
|
all_deals = [] |
|
|
|
|
|
for page in range(1, num_pages + 1): |
|
try: |
|
|
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36' |
|
} |
|
response = requests.get(f"{url}?page={page}&per_page={per_page}", headers=headers) |
|
|
|
if response.status_code == 200: |
|
deals = response.json() |
|
all_deals.extend(deals) |
|
print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API") |
|
|
|
|
|
if len(deals) < per_page: |
|
print(f"Reached the end of available deals at page {page}") |
|
break |
|
else: |
|
print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}") |
|
print("Falling back to sample deals data") |
|
return SAMPLE_DEALS |
|
except Exception as e: |
|
print(f"Error fetching page {page} from DealsFinders API: {str(e)}") |
|
print("Falling back to sample deals data") |
|
return SAMPLE_DEALS |
|
|
|
|
|
if not all_deals: |
|
print("No deals fetched from API. Using sample deals data") |
|
return SAMPLE_DEALS |
|
|
|
return all_deals |
|
|
|
|
|
def process_deals_data(deals_data): |
|
""" |
|
Process the deals data into a structured format |
|
""" |
|
processed_deals = [] |
|
|
|
for deal in deals_data: |
|
try: |
|
|
|
content_html = deal.get('content', {}).get('rendered', '') |
|
excerpt_html = deal.get('excerpt', {}).get('rendered', '') |
|
|
|
clean_content = extract_text_from_html(content_html) |
|
clean_excerpt = extract_text_from_html(excerpt_html) |
|
|
|
processed_deal = { |
|
'id': deal.get('id'), |
|
'title': deal.get('title', {}).get('rendered', ''), |
|
'link': deal.get('link', ''), |
|
'date': deal.get('date', ''), |
|
'content': clean_content, |
|
'excerpt': clean_excerpt |
|
} |
|
processed_deals.append(processed_deal) |
|
except Exception as e: |
|
print(f"Error processing deal: {str(e)}") |
|
|
|
return processed_deals |
|
|
|
|
|
category_descriptions = { |
|
"electronics": "Electronic devices like headphones, speakers, TVs, smartphones, and gadgets", |
|
"computers": "Laptops, desktops, computer parts, monitors, and computing accessories", |
|
"mobile": "Mobile phones, smartphones, phone cases, screen protectors, and chargers", |
|
"audio": "Headphones, earbuds, speakers, microphones, and audio equipment", |
|
"clothing": "Clothes, shirts, pants, dresses, and fashion items", |
|
"footwear": "Shoes, boots, sandals, slippers, and all types of footwear", |
|
"home": "Home decor, furniture, bedding, and household items", |
|
"kitchen": "Kitchen appliances, cookware, utensils, and kitchen gadgets", |
|
"toys": "Toys, games, and children's entertainment items", |
|
"sports": "Sports equipment, fitness gear, and outdoor recreation items", |
|
"beauty": "Beauty products, makeup, skincare, and personal care items", |
|
"books": "Books, e-books, audiobooks, and reading materials" |
|
} |
|
|
|
|
|
categories = list(category_descriptions.keys()) |
|
|
|
|
|
try: |
|
|
|
from transformers import pipeline |
|
|
|
|
|
classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") |
|
print("Using facebook/bart-large-mnli for classification") |
|
|
|
|
|
from sentence_transformers import SentenceTransformer, util |
|
|
|
|
|
sentence_model = SentenceTransformer('sentence-transformers/all-mpnet-base-v2') |
|
print("Using sentence-transformers/all-mpnet-base-v2 for semantic search") |
|
|
|
|
|
category_texts = list(category_descriptions.values()) |
|
category_embeddings = sentence_model.encode(category_texts, convert_to_tensor=True) |
|
|
|
|
|
using_recommended_models = True |
|
except Exception as e: |
|
|
|
print(f"Error loading recommended models: {str(e)}") |
|
print("Falling back to local model") |
|
|
|
model_path = os.path.dirname(os.path.abspath(__file__)) |
|
tokenizer = AutoTokenizer.from_pretrained(model_path) |
|
model = AutoModelForSequenceClassification.from_pretrained(model_path) |
|
|
|
|
|
try: |
|
with open(os.path.join(model_path, "categories.json"), "r") as f: |
|
categories = json.load(f) |
|
except Exception as e: |
|
print(f"Error loading categories: {str(e)}") |
|
categories = ["electronics", "clothing", "home", "kitchen", "toys", "other"] |
|
|
|
|
|
using_recommended_models = False |
|
|
|
|
|
DEALS_DATA_PATH = "deals_data.json" |
|
|
|
|
|
def fetch_and_save_deals(max_deals=10000, per_page=100): |
|
""" |
|
Fetch a large number of deals and save them to a local file |
|
""" |
|
print(f"Fetching up to {max_deals} deals...") |
|
|
|
all_deals = [] |
|
num_pages = min(max_deals // per_page + (1 if max_deals % per_page > 0 else 0), 100) |
|
|
|
|
|
for page in range(1, num_pages + 1): |
|
try: |
|
|
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36' |
|
} |
|
response = requests.get(f"https://www.dealsfinders.com/wp-json/wp/v2/posts?page={page}&per_page={per_page}", headers=headers) |
|
|
|
if response.status_code == 200: |
|
deals = response.json() |
|
all_deals.extend(deals) |
|
print(f"Fetched page {page} with {len(deals)} deals from DealsFinders API") |
|
|
|
|
|
if len(deals) < per_page: |
|
print(f"Reached the end of available deals at page {page}") |
|
break |
|
|
|
|
|
if len(all_deals) >= max_deals: |
|
all_deals = all_deals[:max_deals] |
|
print(f"Reached the maximum number of deals ({max_deals})") |
|
break |
|
else: |
|
print(f"Failed to fetch page {page} from DealsFinders API: {response.status_code}") |
|
break |
|
except Exception as e: |
|
print(f"Error fetching page {page} from DealsFinders API: {str(e)}") |
|
break |
|
|
|
|
|
processed_deals = process_deals_data(all_deals) |
|
|
|
|
|
try: |
|
with open(DEALS_DATA_PATH, "w") as f: |
|
json.dump(processed_deals, f) |
|
print(f"Saved {len(processed_deals)} deals to {DEALS_DATA_PATH}") |
|
return processed_deals |
|
except Exception as e: |
|
print(f"Error saving deals to file: {str(e)}") |
|
return processed_deals |
|
|
|
|
|
def load_deals_from_file(): |
|
""" |
|
Load deals from the local file |
|
""" |
|
try: |
|
if os.path.exists(DEALS_DATA_PATH): |
|
with open(DEALS_DATA_PATH, "r") as f: |
|
deals = json.load(f) |
|
print(f"Loaded {len(deals)} deals from {DEALS_DATA_PATH}") |
|
return deals |
|
else: |
|
print(f"Deals file {DEALS_DATA_PATH} does not exist") |
|
return None |
|
except Exception as e: |
|
print(f"Error loading deals from file: {str(e)}") |
|
return None |
|
|
|
|
|
deals_cache = None |
|
|
|
|
|
try: |
|
|
|
deals_cache = load_deals_from_file() |
|
|
|
|
|
if deals_cache is None or len(deals_cache) == 0: |
|
print("No deals found in local file. Fetching deals...") |
|
deals_cache = fetch_and_save_deals() |
|
|
|
print(f"Initialized with {len(deals_cache) if deals_cache else 0} deals") |
|
except Exception as e: |
|
print(f"Error initializing deals cache: {str(e)}") |
|
deals_cache = None |
|
|
|
def classify_text(text, fetch_deals=True): |
|
""" |
|
Classify the text using the model and fetch relevant deals |
|
""" |
|
global deals_cache |
|
|
|
|
|
if using_recommended_models: |
|
|
|
result = classifier(text, categories, multi_label=True) |
|
|
|
|
|
top_categories = [] |
|
for i, (category, score) in enumerate(zip(result['labels'], result['scores'])): |
|
if score > 0.1: |
|
top_categories.append((category, score)) |
|
|
|
|
|
if i >= 2: |
|
break |
|
else: |
|
|
|
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) |
|
|
|
|
|
with torch.no_grad(): |
|
outputs = model(**inputs) |
|
predictions = torch.sigmoid(outputs.logits) |
|
|
|
|
|
top_categories = [] |
|
for i, score in enumerate(predictions[0]): |
|
if score > 0.5: |
|
top_categories.append((categories[i], score.item())) |
|
|
|
|
|
top_categories.sort(key=lambda x: x[1], reverse=True) |
|
|
|
|
|
if top_categories: |
|
result = f"Top categories for '{text}':\n\n" |
|
for category, score in top_categories: |
|
result += f"- {category}: {score:.4f}\n" |
|
|
|
result += f"\nBased on your query, I would recommend looking for deals in the **{top_categories[0][0]}** category.\n\n" |
|
else: |
|
result = f"No categories found for '{text}'. Please try a different query.\n\n" |
|
|
|
|
|
if fetch_deals: |
|
result += "## Relevant Deals from DealsFinders.com\n\n" |
|
|
|
try: |
|
|
|
if deals_cache is None: |
|
|
|
deals_data = fetch_deals_data(num_pages=2, use_sample_data=True) |
|
deals_cache = process_deals_data(deals_data) |
|
|
|
|
|
if using_recommended_models: |
|
|
|
deal_texts = [] |
|
for deal in deals_cache: |
|
|
|
deal_text = f"{deal['title']} {deal['excerpt']}" |
|
deal_texts.append(deal_text) |
|
|
|
|
|
query_embedding = sentence_model.encode(text, convert_to_tensor=True) |
|
deal_embeddings = sentence_model.encode(deal_texts, convert_to_tensor=True) |
|
|
|
|
|
similarities = util.cos_sim(query_embedding, deal_embeddings)[0] |
|
|
|
|
|
top_indices = torch.topk(similarities, k=min(5, len(deals_cache))).indices |
|
|
|
|
|
relevant_deals = [deals_cache[idx] for idx in top_indices] |
|
else: |
|
|
|
query_terms = text.lower().split() |
|
expanded_terms = list(query_terms) |
|
|
|
|
|
top_category = top_categories[0][0] if top_categories else None |
|
|
|
|
|
if top_category == "electronics": |
|
expanded_terms.extend(['electronic', 'device', 'gadget', 'tech', 'technology']) |
|
if any(term in text.lower() for term in ['headphone', 'headphones']): |
|
expanded_terms.extend(['earbuds', 'earphones', 'earpods', 'airpods', 'audio', 'bluetooth', 'wireless']) |
|
elif any(term in text.lower() for term in ['laptop', 'computer']): |
|
expanded_terms.extend(['notebook', 'macbook', 'chromebook', 'pc']) |
|
elif any(term in text.lower() for term in ['tv', 'television']): |
|
expanded_terms.extend(['smart tv', 'roku', 'streaming']) |
|
elif top_category == "kitchen": |
|
expanded_terms.extend(['appliance', 'cookware', 'utensil', 'blender', 'mixer', 'toaster', 'microwave', 'oven']) |
|
elif top_category == "home": |
|
expanded_terms.extend(['furniture', 'decor', 'decoration', 'bedding', 'household']) |
|
elif top_category == "clothing": |
|
expanded_terms.extend(['clothes', 'shirt', 'pants', 'dress', 'fashion', 'wear', 'apparel']) |
|
elif top_category == "toys": |
|
expanded_terms.extend(['game', 'play', 'children', 'kid', 'kids', 'fun']) |
|
|
|
|
|
scored_deals = [] |
|
for deal in deals_cache: |
|
title = deal['title'].lower() |
|
content = deal['content'].lower() |
|
excerpt = deal['excerpt'].lower() |
|
|
|
score = 0 |
|
|
|
|
|
for term in query_terms: |
|
if term in title: |
|
score += 10 |
|
if term in content: |
|
score += 3 |
|
if term in excerpt: |
|
score += 3 |
|
|
|
|
|
for term in expanded_terms: |
|
if term not in query_terms: |
|
if term in title: |
|
score += 5 |
|
if term in content: |
|
score += 1 |
|
if term in excerpt: |
|
score += 1 |
|
|
|
|
|
if top_category: |
|
if top_category.lower() in title.lower(): |
|
score += 15 |
|
if top_category.lower() in content.lower(): |
|
score += 5 |
|
if top_category.lower() in excerpt.lower(): |
|
score += 5 |
|
|
|
|
|
if score > 0: |
|
scored_deals.append((deal, score)) |
|
|
|
|
|
scored_deals.sort(key=lambda x: x[1], reverse=True) |
|
|
|
|
|
relevant_deals = [deal for deal, _ in scored_deals[:5]] |
|
|
|
if relevant_deals: |
|
for i, deal in enumerate(relevant_deals, 1): |
|
result += f"{i}. [{deal['title']}]({deal['link']})\n\n" |
|
else: |
|
result += "No specific deals found for your query. Try a different search term or browse the recommended category.\n\n" |
|
|
|
except Exception as e: |
|
result += f"Error fetching deals: {str(e)}\n\n" |
|
|
|
return result |
|
|
|
|
|
demo = gr.Interface( |
|
fn=classify_text, |
|
inputs=[ |
|
gr.Textbox( |
|
lines=2, |
|
placeholder="Enter your shopping query here...", |
|
label="Shopping Query" |
|
), |
|
gr.Checkbox( |
|
label="Fetch Deals", |
|
value=True, |
|
info="Check to fetch and display deals from DealsFinders.com" |
|
) |
|
], |
|
outputs=gr.Markdown(label="Results"), |
|
title="Shopping Assistant", |
|
description=""" |
|
This demo shows how to use the Shopping Assistant model to classify shopping queries into categories and find relevant deals. |
|
Enter a shopping query below to see which categories it belongs to and find deals from DealsFinders.com. |
|
|
|
Examples: |
|
- "I'm looking for headphones" |
|
- "Do you have any kitchen appliance deals?" |
|
- "Show me the best laptop deals" |
|
- "I need a new smart TV" |
|
""", |
|
examples=[ |
|
["I'm looking for headphones", True], |
|
["Do you have any kitchen appliance deals?", True], |
|
["Show me the best laptop deals", True], |
|
["I need a new smart TV", True], |
|
["headphone deals", True] |
|
], |
|
theme=gr.themes.Soft() |
|
) |
|
|
|
|
|
if __name__ == "__main__": |
|
demo.launch() |
|
|