File size: 3,622 Bytes
1308769
059b923
1308769
0d4640f
ed3015b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a9afa8
ed3015b
 
 
 
 
 
 
 
1308769
 
 
 
 
 
 
ed3015b
 
 
 
1308769
 
80aea0d
1be780a
ae9e0c1
01166ba
3cedfb4
1308769
 
 
 
53a8c9e
 
 
 
 
 
 
620ca5b
53a8c9e
 
 
 
 
 
6d76bd4
53a8c9e
 
 
 
cd66250
39e6a9c
53a8c9e
 
1308769
 
 
 
 
 
 
 
 
 
 
 
 
 
45f0ebb
 
 
 
 
 
 
 
 
 
1308769
 
 
 
 
 
80aea0d
1308769
39e6a9c
b4a31bc
1308769
 
13335cc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import gradio as gr
import re
from transformers import pipeline
# from googlesearch import search
import requests
from bs4 import BeautifulSoup

def get_google_description(keyword):
    query = keyword
    results = search(query, num_results=1, lang='en')

    for result in results:
        description = get_description_from_url(result)
        if description:
            return description

    return keyword

def get_description_from_url(url):
    response = requests.get(url, timeout=10)
    soup = BeautifulSoup(response.text, 'html.parser')
    description_tag = soup.find('meta', {'name': 'description'})

    if description_tag:
        return description_tag.get('content')

    return None


title = "Fold: Contextual Tag Recommendation System"
description = "powered by bart-large-mnli, made by @abhisheky127"

classifier = pipeline("zero-shot-classification",
                      model="facebook/bart-large-mnli")





#define a function to process your input and output
def zero_shot(doc, candidates):
    given_labels = candidates.split(",")
    given_labels = list(map(str.strip, given_labels))
    doc = preprocess(doc)
    print(doc)
    dictionary = classifier(doc, given_labels)
    labels = dictionary['labels']
    scores = dictionary['scores']
    return dict(zip(labels, scores))

def preprocess(text):
    # Remove digits
    cleaned_text = re.sub(r'\d', '', text)
    
    # Remove special characters except spaces and letters
    cleaned_text = re.sub(r'[^a-zA-Z\s]', ' ', cleaned_text)
    
    # Remove extra spaces
    cleaned_text = re.sub(r'\s+', ' ', cleaned_text).strip()
    
    # Convert to uppercase
    cleaned_text = cleaned_text.upper()
    
    # Remove unwanted words
    words_to_remove = ["MPS", "POS", "BIL", "ONL", "BANGALORE", "PVT", "LTD", "INDIA", "LT", "XXXXXXXXXXXX"]
    cleaned_text = " ".join([word for word in cleaned_text.split() if word not in words_to_remove])
    
    # Convert to lowercase
    cleaned_text = cleaned_text.lower()

    # cleaned_text = get_google_description(cleaned_text)
    
    return cleaned_text


#create input and output objects
#input object1
input1 = gr.Textbox(label="Text")

#input object 2
input2 = gr.Textbox(label="Labels")

#output object
output = gr.Label(label="Output")

#example object
transactions_and_tags = [
    ["MPS/TRUFFLES/202303261700/034587/Bangalore", "Medical, Food, Shopping, Subscription, Travel"],
    ["MPS/TACO BELL/202304012247/108300/BANGALORE", "Medical, Food, Shopping, Subscription, Travel"],
    ["POS XXXXXXXXXXXX0001 APOLLO PHARMACY", "Medical, Food, Shopping, Subscription, Travel"],
    ["BIL/ONL/000471093694/1MG Techno/X7ZRUSVLURFQZO", "Medical, Food, Shopping, Subscription, Travel"],
    ["POS XXXXXXXXXXXX1111 DECATHLON SPORTS", "Medical, Food, Shopping, Subscription, Travel"],
    ["POS XXXXXXXXXXXX1111 WWW AMAZON IN", "Medical, Food, Shopping, Subscription, Travel"],
    ["ME DC SI XXXXXXXXXXXX1111 SPOTIFY SI", "Medical, Food, Shopping, Subscription, Travel"],
    ["POS/NETFLIX/1140920002/100623/17:25", "Medical, Food, Shopping, Subscription, Travel"],
    ["POS XXXXXXXXXXXX1110 MAKEMYTRIP INDIA", "Medical, Food, Shopping, Subscription, Travel"],
    ["BIL/ONL/000691178015/IRCTC Serv/XZZBX91LTCY1AZ", "Medical, Food, Shopping, Subscription, Travel"]
]

#create interface
gui = gr.Interface(title=title,
                   description=description,
                   fn=zero_shot,
                   inputs=[input1, input2],
                   outputs=[output],
                   examples=transactions_and_tags
                  )

#display the interface
gui.launch(debug=True)