import gradio as gr import re from transformers import pipeline # from googlesearch import search import requests from bs4 import BeautifulSoup def get_google_description(keyword): query = keyword results = search(query, num_results=1, lang='en') for result in results: description = get_description_from_url(result) if description: return description return keyword def get_description_from_url(url): response = requests.get(url, timeout=10) soup = BeautifulSoup(response.text, 'html.parser') description_tag = soup.find('meta', {'name': 'description'}) if description_tag: return description_tag.get('content') return None title = "Fold: Contextual Tag Recommendation System" description = "powered by bart-large-mnli, made by @abhisheky127" classifier = pipeline("zero-shot-classification", model="facebook/bart-large-mnli") #define a function to process your input and output def zero_shot(doc, candidates): given_labels = candidates.split(",") given_labels = list(map(str.strip, given_labels)) doc = preprocess(doc) print(doc) dictionary = classifier(doc, given_labels) labels = dictionary['labels'] scores = dictionary['scores'] return dict(zip(labels, scores)) def preprocess(text): # Remove digits cleaned_text = re.sub(r'\d', '', text) # Remove special characters except spaces and letters cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text) # Remove extra spaces cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip() # Convert to uppercase cleaned_text = cleaned_text.upper() # Remove unwanted words words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT", "XXXXXXXXXXXX"] cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove]) # Convert to lowercase cleaned_text = cleaned_text.lower() # cleaned_text = get_google_description(cleaned_text) return cleaned_text #create input and output objects #input object1 input1 = gr.Textbox(label="Text") #input object 2 input2 = gr.Textbox(label="Labels") #output object output = gr.Label(label="Output") #example object transactions_and_tags = [ ["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel"], ["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel"], ["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel"], ["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel"], ["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel"], ["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel"], ["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel"], ["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel"], ["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel"], ["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel"] ] #create interface gui = gr.Interface(title=title, description=description, fn=zero_shot, inputs=[input1, input2], outputs=[output], examples=transactions_and_tags ) #display the interface gui.launch(debug=True)