File size: 3,881 Bytes
1308769
059b923
1308769
 
 
 
 
 
 
 
 
 
80aea0d
1be780a
ae9e0c1
01166ba
3cedfb4
1308769
 
 
 
 
620ca5b
 
 
 
 
 
 
53a8c9e
 
620ca5b
53a8c9e
 
620ca5b
53a8c9e
 
 
620ca5b
53a8c9e
 
620ca5b
53a8c9e
 
 
 
 
 
 
 
 
 
 
620ca5b
53a8c9e
 
 
 
 
 
bcc9686
53a8c9e
 
 
 
 
 
1308769
 
 
 
 
 
 
 
 
 
 
 
 
 
ae9e0c1
 
bc5bb7e
947bf26
 
 
 
 
 
 
 
1308769
 
 
 
 
 
80aea0d
1308769
 
 
 
bc3c411
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import gradio as gr
import re
from transformers import pipeline

title = "Fold: Contextual Tag Recommendation System"
description = "powered by bart-large-mnli, made by @abhisheky127"

classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")

#define a function to process your input and output
def zero_shot(doc, candidates):
    given_labels = candidates.split(",")
    given_labels = list(map(str.strip, given_labels))
    doc = preprocess(doc)
    print(doc)
    dictionary = classifier(doc, given_labels)
    labels = dictionary['labels']
    scores = dictionary['scores']
    return dict(zip(labels, scores))

#define a function to preprocess transaction query
# def preprocess(transaction):
#     pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
#     match = re.search(pattern, transaction)
#     if match:
#         return match.group(1).strip()
#     return None

# def preprocess(transaction):
#     remove_words = ["pos", "mps", "bil", "onl"]

#     # Convert to lowercase
#     transaction = transaction.lower()

#     # Remove unwanted words
#     for word in remove_words:
#         transaction = transaction.replace(word, "")

#     # Remove special characters and digits
#     transaction = re.sub(r"[^a-z\s]+", "", transaction)

#     # Remove extra spaces
#     transaction = re.sub(r"\s+", " ", transaction).strip()
#     return transaction

def preprocess(text):
    # Remove digits
    cleaned_text = re.sub(r'\d', '', text)
    
    # Remove special characters except spaces and letters
    cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text)
    
    # Remove extra spaces
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    
    # Convert to uppercase
    cleaned_text = cleaned_text.upper()
    
    # Remove unwanted words
    words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT"]
    cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove])
    
    # Convert to lowercase
    cleaned_text = cleaned_text.lower()
    
    return cleaned_text


#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")

#input object 2
input2 = gr.Textbox(label="Labels")

#output object
output = gr.Label(label="Output")

#example object
transactions_and_tags = [
    ["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["POS XXXXXXXXXXXX1111 IKEA INDIA PVT L", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"],
    ["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel, Miscellaneous"]
]

#create interface
gui = gr.Interface(title=title,
                   description=description,
                   fn=zero_shot,
                   inputs=[input1, input2],
                   outputs=[output],
                   examples=transactions_and_tags)

#display the interface
gui.launch(debug=True)