File size: 3,716 Bytes
1308769 059b923 1308769 ed3015b 0a9afa8 ed3015b 1308769 ed3015b 1308769 80aea0d 1be780a ae9e0c1 01166ba 3cedfb4 1308769 53a8c9e 620ca5b 53a8c9e 6d76bd4 53a8c9e cd66250 39e6a9c 53a8c9e 1308769 45f0ebb 1308769 80aea0d 1308769 39e6a9c b4a31bc 1308769 ef996f6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
import gradio as gr
import re
from transformers import pipeline
from googlesearch import search
import requests
from bs4 import BeautifulSoup
def get_google_description(keyword):
query = keyword
results = search(query, num_results=1, lang='en')
for result in results:
description = get_description_from_url(result)
if description:
return description
return keyword
def get_description_from_url(url):
response = requests.get(url, timeout=10)
soup = BeautifulSoup(response.text, 'html.parser')
description_tag = soup.find('meta', {'name': 'description'})
if description_tag:
return description_tag.get('content')
return None
title = "Fold: Contextual Tag Recommendation System"
description = "powered by bart-large-mnli, made by @abhisheky127"
classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
#define a function to process your input and output
def zero_shot(doc, candidates):
given_labels = candidates.split(",")
given_labels = list(map(str.strip, given_labels))
doc = preprocess(doc)
print(doc)
dictionary = classifier(doc, given_labels)
labels = dictionary['labels']
scores = dictionary['scores']
return dict(zip(labels, scores))
def preprocess(text):
# Remove digits
cleaned_text = re.sub(r'\d', '', text)
# Remove special characters except spaces and letters
cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text)
# Remove extra spaces
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
# Convert to uppercase
cleaned_text = cleaned_text.upper()
# Remove unwanted words
words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT", "XXXXXXXXXXXX"]
cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove])
# Convert to lowercase
cleaned_text = cleaned_text.lower()
# cleaned_text = get_google_description(cleaned_text)
return cleaned_text
#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")
#input object 2
input2 = gr.Textbox(label="Labels")
#output object
output = gr.Label(label="Output")
#example object
transactions_and_tags = [
["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel"],
["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel"],
["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1111 IKEA INDIA PVT L", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel"],
["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel"],
["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel"],
["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel"],
["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel"]
]
#create interface
gui = gr.Interface(title=title,
description=description,
fn=zero_shot,
inputs=[input1, input2],
outputs=[output],
examples=transactions_and_tags
)
#display the interface
gui.launch(debug=True) |