File size: 3,881 Bytes
1308769 059b923 1308769 80aea0d 1be780a ae9e0c1 01166ba 3cedfb4 1308769 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e 620ca5b 53a8c9e bcc9686 53a8c9e 1308769 ae9e0c1 bc5bb7e 947bf26 1308769 80aea0d 1308769 bc3c411 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import gradio as gr
import re
from transformers import pipeline
title = "Fold: Contextual Tag Recommendation System"
description = "powered by bart-large-mnli, made by @abhisheky127"
classifier = pipeline("zero-shot-classification",
model="facebook/bart-large-mnli")
#define a function to process your input and output
def zero_shot(doc, candidates):
given_labels = candidates.split(",")
given_labels = list(map(str.strip, given_labels))
doc = preprocess(doc)
print(doc)
dictionary = classifier(doc, given_labels)
labels = dictionary['labels']
scores = dictionary['scores']
return dict(zip(labels, scores))
#define a function to preprocess transaction query
# def preprocess(transaction):
# pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
# match = re.search(pattern, transaction)
# if match:
# return match.group(1).strip()
# return None
# def preprocess(transaction):
# remove_words = ["pos", "mps", "bil", "onl"]
# # Convert to lowercase
# transaction = transaction.lower()
# # Remove unwanted words
# for word in remove_words:
# transaction = transaction.replace(word, "")
# # Remove special characters and digits
# transaction = re.sub(r"[^a-z\s]+", "", transaction)
# # Remove extra spaces
# transaction = re.sub(r"\s+", " ", transaction).strip()
# return transaction
def preprocess(text):
# Remove digits
cleaned_text = re.sub(r'\d', '', text)
# Remove special characters except spaces and letters
cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text)
# Remove extra spaces
cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
# Convert to uppercase
cleaned_text = cleaned_text.upper()
# Remove unwanted words
words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT"]
cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove])
# Convert to lowercase
cleaned_text = cleaned_text.lower()
return cleaned_text
#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")
#input object 2
input2 = gr.Textbox(label="Labels")
#output object
output = gr.Label(label="Output")
#example object
transactions_and_tags = [
["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1111 IKEA INDIA PVT L", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"]
]
#create interface
gui = gr.Interface(title=title,
description=description,
fn=zero_shot,
inputs=[input1, input2],
outputs=[output],
examples=transactions_and_tags)
#display the interface
gui.launch(debug=True) |