autosumm / summarizer /summarize.py
mhsvieira's picture
Add current system
e539b70
raw
history blame
No virus
878 Bytes
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
def summarize(text: str) -> str:
"""
Generate a summary based from the given text
"""
# Model for abstraction
model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
tokenizer = AutoTokenizer.from_pretrained('t5-base')
input_tokens = tokenizer.encode(
f'summarize: {text}',
return_tensors='pt',
max_length=tokenizer.model_max_length,
truncation=True
)
summary_ids = model.generate(
input_tokens,
min_length=80,
max_length=150,
length_penalty=15,
num_beams=2
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summary = '. '.join([phrase.capitalize() for phrase in summary.split('. ')])
if not summary[-1] == '.':
summary = summary + '.'
return summary