File size: 763 Bytes
a9e7556
e539b70
a9e7556
78a71e8
e539b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
from utils.timing import Timer

@Timer.time_it('abstração', 'abstraction')
def summarize(text: str, model, tokenizer) -> str:
    """
    Generate a summary based from the given text
    """

    input_tokens = tokenizer.encode(
        f'summarize: {text}',
        return_tensors='pt',
        max_length=tokenizer.model_max_length,
        truncation=True
    )

    summary_ids = model.generate(
        input_tokens,
        min_length=80,
        max_length=150,
        length_penalty=15, 
        num_beams=2
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    summary = '. '.join([phrase.capitalize() for phrase in summary.split('. ')])
    if not summary[-1] == '.':
        summary = summary + '.'

    return summary