File size: 878 Bytes
e539b70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM

def summarize(text: str) -> str:
    """
    Generate a summary based from the given text
    """

    # Model for abstraction
    model = AutoModelForSeq2SeqLM.from_pretrained('t5-base')
    tokenizer = AutoTokenizer.from_pretrained('t5-base')

    input_tokens = tokenizer.encode(
        f'summarize: {text}',
        return_tensors='pt',
        max_length=tokenizer.model_max_length,
        truncation=True
    )

    summary_ids = model.generate(
        input_tokens,
        min_length=80,
        max_length=150,
        length_penalty=15, 
        num_beams=2
    )

    summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)

    summary = '. '.join([phrase.capitalize() for phrase in summary.split('. ')])
    if not summary[-1] == '.':
        summary = summary + '.'

    return summary