QueryAnalyzerV2 / app.py
DINGOLANI's picture
Update app.py
754152d verified
import gradio as gr
import torch
import re
from transformers import AutoTokenizer, AutoModelForTokenClassification, pipeline
# Load NER-Luxury model from Hugging Face
model_name = "AkimfromParis/NER-Luxury"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForTokenClassification.from_pretrained(model_name)
# Load pipeline for Named Entity Recognition (NER)
ner_pipeline = pipeline("ner", model=model, tokenizer=tokenizer)
# Regex for extracting price
price_pattern = re.compile(r'(\bunder\b|\babove\b|\bbelow\b|\bbetween\b)?\s?(\d{1,5})\s?(AED|USD|EUR)?', re.IGNORECASE)
# Keywords for gender extraction
gender_keywords = ["men", "male", "women", "female", "unisex"]
def extract_attributes(query):
"""
Extract structured fashion attributes dynamically using the fine-tuned NER-Luxury model.
"""
structured_output = {"Brand": "Unknown", "Category": "Unknown", "Gender": "Unknown", "Price": "Unknown"}
# Run NER model on query
entities = ner_pipeline(query)
for entity in entities:
entity_text = entity["word"].replace("##", "") # Fix tokenization artifacts
entity_label = entity["entity"]
if "HOUSE" in entity_label or "BRAND" in entity_label: # Luxury brands
structured_output["Brand"] = entity_text
elif "CATEGORY" in entity_label: # Fashion categories
structured_output["Category"] = entity_text
elif "MONETARYVALUE" in entity_label: # Price values
structured_output["Price"] = entity_text
# Extract gender
for gender in gender_keywords:
if gender in query.lower():
structured_output["Gender"] = gender.capitalize()
break
# Extract price if not found by NER
price_match = price_pattern.search(query)
if price_match and structured_output["Price"] == "Unknown":
condition, amount, currency = price_match.groups()
structured_output["Price"] = f"{condition.capitalize() if condition else ''} {amount} {currency if currency else 'AED'}".strip()
return structured_output
# Define Gradio UI
def parse_query(user_query):
"""
Parses fashion-related queries into structured attributes.
"""
parsed_output = extract_attributes(user_query)
return parsed_output # JSON output
# Create Gradio Interface
with gr.Blocks() as demo:
gr.Markdown("# 🛍️ Luxury Fashion Query Parser using NER-Luxury")
query_input = gr.Textbox(label="Enter your search query", placeholder="e.g., Gucci men’s perfume under 200AED")
output_box = gr.JSON(label="Parsed Output")
parse_button = gr.Button("Parse Query")
parse_button.click(parse_query, inputs=[query_input], outputs=[output_box])
demo.launch()