abhisheky127 commited on
Commit
ed3015b
1 Parent(s): ee3f8bf

adding keyword crawling from google api in the flow

Browse files
Files changed (1) hide show
  1. app.py +30 -25
app.py CHANGED
@@ -1,6 +1,31 @@
1
  import gradio as gr
2
  import re
3
  from transformers import pipeline
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
  title = "Fold: Contextual Tag Recommendation System"
6
  description = "powered by bart-large-mnli, made by @abhisheky127"
@@ -8,42 +33,22 @@ description = "powered by bart-large-mnli, made by @abhisheky127"
8
  classifier = pipeline("zero-shot-classification",
9
  model="facebook/bart-large-mnli")
10
 
 
 
 
 
11
  #define a function to process your input and output
12
  def zero_shot(doc, candidates):
13
  given_labels = candidates.split(",")
14
  given_labels = list(map(str.strip, given_labels))
15
  doc = preprocess(doc)
 
16
  print(doc)
17
  dictionary = classifier(doc, given_labels)
18
  labels = dictionary['labels']
19
  scores = dictionary['scores']
20
  return dict(zip(labels, scores))
21
 
22
- #define a function to preprocess transaction query
23
- # def preprocess(transaction):
24
- # pattern = r'([A-Za-z0-9\s]+)(?:/| |$)'
25
- # match = re.search(pattern, transaction)
26
- # if match:
27
- # return match.group(1).strip()
28
- # return None
29
-
30
- # def preprocess(transaction):
31
- # remove_words = ["pos", "mps", "bil", "onl"]
32
-
33
- # # Convert to lowercase
34
- # transaction = transaction.lower()
35
-
36
- # # Remove unwanted words
37
- # for word in remove_words:
38
- # transaction = transaction.replace(word, "")
39
-
40
- # # Remove special characters and digits
41
- # transaction = re.sub(r"[^a-z\s]+", "", transaction)
42
-
43
- # # Remove extra spaces
44
- # transaction = re.sub(r"\s+", " ", transaction).strip()
45
- # return transaction
46
-
47
  def preprocess(text):
48
  # Remove digits
49
  cleaned_text = re.sub(r'\d', '', text)
 
1
  import gradio as gr
2
  import re
3
  from transformers import pipeline
4
+ from googlesearch import search
5
+ import requests
6
+ from bs4 import BeautifulSoup
7
+
8
+ def get_google_description(keyword):
9
+ query = keyword
10
+ results = search(query, num_results=1, lang='en')
11
+
12
+ for result in results:
13
+ description = get_description_from_url(result)
14
+ if description:
15
+ return description
16
+
17
+ return keyword
18
+
19
+ def get_description_from_url(url):
20
+ response = requests.get(url)
21
+ soup = BeautifulSoup(response.text, 'html.parser')
22
+ description_tag = soup.find('meta', {'name': 'description'})
23
+
24
+ if description_tag:
25
+ return description_tag.get('content')
26
+
27
+ return None
28
+
29
 
30
  title = "Fold: Contextual Tag Recommendation System"
31
  description = "powered by bart-large-mnli, made by @abhisheky127"
 
33
  classifier = pipeline("zero-shot-classification",
34
  model="facebook/bart-large-mnli")
35
 
36
+
37
+
38
+
39
+
40
  #define a function to process your input and output
41
  def zero_shot(doc, candidates):
42
  given_labels = candidates.split(",")
43
  given_labels = list(map(str.strip, given_labels))
44
  doc = preprocess(doc)
45
+ doc = get_google_description(doc)
46
  print(doc)
47
  dictionary = classifier(doc, given_labels)
48
  labels = dictionary['labels']
49
  scores = dictionary['scores']
50
  return dict(zip(labels, scores))
51
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
  def preprocess(text):
53
  # Remove digits
54
  cleaned_text = re.sub(r'\d', '', text)