File size: 3,855 Bytes
1308769 059b923 1308769 80aea0d 1be780a ae9e0c1 3cedfb4 1308769 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e bcc9686 53a8c9e 1308769 ae9e0c1 947bf26 1308769 80aea0d 1308769 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import re
from transformers import pipeline
title = "Fold: Contextual Tag Recommendation System"
description = "powered by bart-large-mnli, made by @abhisheky127"
classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
#define a function to process your input and output
def zero_shot(doc, candidates):
given_labels = candidates.split(",")
given_labels = list(map(str.strip, given_labels))
doc = preprocess(doc)
dictionary = classifier(doc, given_labels)
labels = dictionary['labels']
scores = dictionary['scores']
return dict(zip(labels, scores))
#define a function to preprocess transaction query
# def preprocess(transaction):
# pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
# match = re.search(pattern, transaction)
# if match:
# return match.group(1).strip()
# return None
# def preprocess(transaction):
# remove_words = ["pos", "mps", "bil", "onl"]
# # Convert to lowercase
# transaction = transaction.lower()
# # Remove unwanted words
# for word in remove_words:
# transaction = transaction.replace(word, "")
# # Remove special characters and digits
# transaction = re.sub(r"[^a-z\s]+", "", transaction)
# # Remove extra spaces
# transaction = re.sub(r"\s+", " ", transaction).strip()
# return transaction
def preprocess(text):
# Remove digits
cleaned_text = re.sub(r'\d', '', text)
# Remove special characters except spaces and letters
cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text)
# Remove extra spaces
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
# Convert to uppercase
cleaned_text = cleaned_text.upper()
# Remove unwanted words
words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT"]
cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove])
# Convert to lowercase
cleaned_text = cleaned_text.lower()
return cleaned_text
#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")
#input object 2
input2 = gr.Textbox(label="Labels")
#output object
output = gr.Label(label="Output")
#example object
transactions_and_tags = [
["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX0001APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1111 IKEA INDIA PVT L", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"]
]
#create interface
gui = gr.Interface(title=title,
description=description,
fn=zero_shot,
inputs=[input1, input2],
outputs=[output],
examples=transactions_and_tags)
#display the interface
gui.launch() |