autosumm / summarizer /summarize.py
mhsvieira's picture
Add timer
a9e7556
from utils.timing import Timer
@Timer.time_it('abstração', 'abstraction')
def summarize(text: str, model, tokenizer) -> str:
"""
Generate a summary based from the given text
"""
input_tokens = tokenizer.encode(
f'summarize: {text}',
return_tensors='pt',
max_length=tokenizer.model_max_length,
truncation=True
)
summary_ids = model.generate(
input_tokens,
min_length=80,
max_length=150,
length_penalty=15,
num_beams=2
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
summary = '. '.join([phrase.capitalize() for phrase in summary.split('. ')])
if not summary[-1] == '.':
summary = summary + '.'
return summary