quyip commited on
Commit
1f6b7aa
1 Parent(s): 3784e1c
Files changed (1) hide show
  1. utils/summary_utils.py +3 -25
utils/summary_utils.py CHANGED
@@ -9,7 +9,6 @@ AiSummaryVersion = 2
9
  MinTagScore = 0.7
10
  summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
11
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
12
- classification_pipe = pipeline("text-classification", model="Yueh-Huan/news-category-classification-distilbert")
13
  tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
14
  tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
15
  tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
@@ -22,9 +21,8 @@ def summarize(id: str, text: str):
22
  }
23
  summary = get_summarization(text) if len(text) > 100 else text
24
  translated = get_en_translation(summary)
25
- tags1 = get_classification(translated)
26
- tags2 = get_tags(translated)
27
- tags = filter_tags(tags1 + tags2)
28
  tags = sorted(list(set(tags)))
29
 
30
  value = {
@@ -68,29 +66,9 @@ def get_tags(text: str):
68
  if text is None:
69
  return []
70
  try:
71
- print(tag_gen_pipe_1(text))
72
- print(tag_gen_pipe_2(text))
73
- print(tag_gen_pipe_3(text))
74
  tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
75
  tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
76
  tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore]
77
- print(tags1)
78
- print(tags2)
79
- print(tags3)
80
- # print(tags1, tags2, tags3)
81
- return []
82
- except:
83
- return []
84
-
85
-
86
- def get_classification(text: str):
87
- if text is None:
88
- return []
89
- try:
90
- result = classification_pipe(text)
91
- if isinstance(result, list):
92
- return [tag['label'].strip() for tag in result if tag['score'] > 0.75]
93
- else:
94
- return [result['label'].strip()] if result['score'] > 0.75 else []
95
  except:
96
  return []
 
9
  MinTagScore = 0.7
10
  summarization_pipeline = pipeline("summarization", model="csebuetnlp/mT5_multilingual_XLSum")
11
  en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
 
12
  tag_gen_pipe_1 = pipeline("text-classification", model="yiyanghkust/finbert-esg-9-categories")
13
  tag_gen_pipe_2 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
14
  tag_gen_pipe_3 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
 
21
  }
22
  summary = get_summarization(text) if len(text) > 100 else text
23
  translated = get_en_translation(summary)
24
+ tags = get_tags(translated)
25
+ tags = filter_tags(tags)
 
26
  tags = sorted(list(set(tags)))
27
 
28
  value = {
 
66
  if text is None:
67
  return []
68
  try:
 
 
 
69
  tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
70
  tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
71
  tags3 = [tag['label'] for tag in tag_gen_pipe_3(text) if tag['score'] >= MinTagScore]
72
+ return tags1 + tags2 + tags3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  except:
74
  return []