import gradio as gr import pandas as pd import nltk import random from bs4 import BeautifulSoup from sklearn.metrics.pairwise import cosine_similarity import numpy as np import spacy from numpy.linalg import norm from sentence_transformers import SentenceTransformer, util # nltk.download('punkt') # nltk.download('wordnet') # nltk.download('omw-1.4') threshold = 0.65 sentence_length = 6 questions = [ "Is it new or used", "Are there any wear & tear", "Does it come with dust bag, receipt & original box", "Are there any scratches, marks", "Are there any fading, stains, discolorization", "Is this item customized, repainted or has hardware been replaced", "Is it special edition", "Is there any odour", "Are there multiple items or extra add-ons in this listing?", "Is there a date code or serial number present on the item?" ] model = SentenceTransformer("all-MiniLM-L6-v2") def generate_phrases(desc: str, length: int): desc_list = desc.split() phrase_list = [] if len(desc_list) >= length: for i in range(len(desc_list) - (length - 1)): sub_list = [] for j in range(i, i + length): sub_list.append(desc_list[j]) phrase_list.append(' '.join(sub_list)) else: phrase_list.append(' '.join(desc_list)) return phrase_list def find_answers_new(description: str): sentences = generate_phrases(description, sentence_length) sentences_embedding = model.encode(sentences) answers = [] for question in questions: query_embedding = model.encode(question) similarities = util.cos_sim(query_embedding, sentences_embedding) similarity_i = 0 new_row = None for similarity in similarities[0]: model_answer = sentences[similarity_i] similarity_i += 1 if round(similarity.item(), 2) > threshold: if new_row is not None and similarity.item() < new_row['Similarity']: continue new_row = {'ModelAnswer': model_answer, 'Similarity': similarity.item()} if new_row is not None: answers.append(new_row['ModelAnswer']) else: answers.append('No answer') return answers authorized_users = [("test_user_account", "test_user_pwd"), ] demo = gr.Interface(fn=find_answers_new, inputs=[gr.Textbox(lines=20, label="Item Description", placeholder="Desc Here...")], outputs=[gr.Textbox(lines=1, label="Is it new or used"), gr.Textbox(lines=1, label="Are there any wear & tear"), gr.Textbox(lines=1, label="Does it come with dust bag, receipt & original box"), gr.Textbox(lines=1, label="Are there any scratches, marks"), gr.Textbox(lines=1, label="Are there any fading, stains, discolorization"), gr.Textbox(lines=1, label="Is this item customized, repainted or has hardware been replaced"), gr.Textbox(lines=1, label="Is it special edition"), gr.Textbox(lines=1, label="Is there any odour"), gr.Textbox(lines=1, label="Are there multiple items or extra add-ons in this listing?"), gr.Textbox(lines=1, label="Is there a date code or serial number present on the item?") ]) demo.launch(debug=False, share=True, auth=authorized_users)