Spaces:
Sleeping
Sleeping
File size: 1,775 Bytes
bc125e7 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
import torch
from transformers import T5ForConditionalGeneration,T5Tokenizer
def greedy_decoding (inp_ids,attn_mask,model,tokenizer):
greedy_output = model.generate(input_ids=inp_ids, attention_mask=attn_mask, max_length=256)
Question = tokenizer.decode(greedy_output[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)
return Question.strip().capitalize()
def beam_search_decoding (inp_ids,attn_mask,model,tokenizer):
beam_output = model.generate(input_ids=inp_ids,
attention_mask=attn_mask,
max_length=256,
num_beams=10,
num_return_sequences=3,
no_repeat_ngram_size=2,
early_stopping=True
)
Questions = [tokenizer.decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True) for out in
beam_output]
return [Question.strip().capitalize() for Question in Questions]
def topkp_decoding (inp_ids,attn_mask,model,tokenizer):
topkp_output = model.generate(input_ids=inp_ids,
attention_mask=attn_mask,
max_length=256,
do_sample=True,
top_k=40,
top_p=0.80,
num_return_sequences=3,
no_repeat_ngram_size=2,
early_stopping=True
)
Questions = [tokenizer.decode(out, skip_special_tokens=True,clean_up_tokenization_spaces=True) for out in topkp_output]
return [Question.strip().capitalize() for Question in Questions] |