abhisheky127's picture
adding keyword crawling from google api in the flow
ed3015b
import gradio as gr
import re
from transformers import pipeline
from googlesearch import search
import requests
from bs4 import BeautifulSoup
def get_google_description(keyword):
query = keyword
results = search(query, num_results=1, lang='en')
for result in results:
description = get_description_from_url(result)
if description:
return description
return keyword
def get_description_from_url(url):
response = requests.get(url)
soup = BeautifulSoup(response.text, 'html.parser')
description_tag = soup.find('meta', {'name': 'description'})
if description_tag:
return description_tag.get('content')
return None
title = "Fold: Contextual Tag Recommendation System"
description = "powered by bart-large-mnli, made by @abhisheky127"
classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
#define a function to process your input and output
def zero_shot(doc, candidates):
given_labels = candidates.split(",")
given_labels = list(map(str.strip, given_labels))
doc = preprocess(doc)
doc = get_google_description(doc)
print(doc)
dictionary = classifier(doc, given_labels)
labels = dictionary['labels']
scores = dictionary['scores']
return dict(zip(labels, scores))
def preprocess(text):
# Remove digits
cleaned_text = re.sub(r'\d', '', text)
# Remove special characters except spaces and letters
cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text)
# Remove extra spaces
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
# Convert to uppercase
cleaned_text = cleaned_text.upper()
# Remove unwanted words
words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT", "XXXXXXXXXXXX"]
cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove])
# Convert to lowercase
cleaned_text = cleaned_text.lower()
return cleaned_text
#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")
#input object 2
input2 = gr.Textbox(label="Labels")
#output object
output = gr.Label(label="Output")
#example object
transactions_and_tags = [
["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel"],
["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel"],
["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1111 IKEA INDIA PVT L", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel"],
["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel"],
["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel"],
["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel"]
]
#create interface
gui = gr.Interface(title=title,
description=description,
fn=zero_shot,
inputs=[input1, input2],
outputs=[output],
examples=transactions_and_tags)
#display the interface
gui.launch(debug=True)