Spaces:
Runtime error
Runtime error
File size: 2,084 Bytes
cdc5783 7019e7f 090f2d4 da7746d cdc5783 7019e7f cdc5783 a680719 cdc5783 4c74009 2c41d85 858ef45 1f6b7aa 2a78aa3 cdc5783 a680719 cdc5783 a680719 cdc5783 30e268a 60a3519 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 e2ec8e0 cdc5783 858ef45 cdc5783 3784e1c 7019e7f e2ec8e0 cdc5783 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
from langdetect import detect
from transformers import pipeline
from utils.tag_utils import filter_tags
AiSummaryVersion = 3
MinTagScore = 0.7
summarization_pipeline = pipeline("summarization", model="Falconsai/text_summarization")
en_translation_pipe = pipeline("translation", model="Helsinki-NLP/opus-mt-mul-en")
tag_gen_pipe_1 = pipeline("text-classification", model="dima806/news-category-classifier-distilbert")
tag_gen_pipe_2 = pipeline("text-classification", model="elozano/bert-base-cased-news-category")
def summarize(id: str, text: str):
if text is None or len(text) < 10:
return {
"ver": AiSummaryVersion
}
summary = get_summarization(text) if len(text) > 3000 else text
translated = get_en_translation(summary)
tags = get_tags(translated, id)
tags = filter_tags(tags)
tags = sorted(list(set(tags)))
value = {
"id": id,
"ver": AiSummaryVersion,
"summary": summary,
"tags": tags,
}
return value
def get_summarization(text: str):
try:
# Max / Min number of words
result = summarization_pipeline(text, max_length=500, min_length=100, do_sample=False)
return result[0]['summary_text'] if isinstance(result, list) else result['summary_text']
except:
return None
def get_en_translation(text: str):
if text is None:
return None
try:
if is_english(text):
return text
result = en_translation_pipe(text)
return result[0]['translation_text'] if isinstance(result, list) else result['translation_text']
except:
return None
def is_english(text):
try:
lang = detect(text)
return lang == 'en'
except:
return False
def get_tags(text: str, id: str):
if text is None:
return []
try:
tags1 = [tag['label'] for tag in tag_gen_pipe_1(text) if tag['score'] >= MinTagScore]
tags2 = [tag['label'] for tag in tag_gen_pipe_2(text) if tag['score'] >= MinTagScore]
return tags1 + tags2
except:
return []
|