Rehman1603 commited on
Commit
bc125e7
1 Parent(s): d1503aa

Create encoding.py

Browse files
Files changed (1) hide show
  1. encoding.py +37 -0
encoding.py ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ from transformers import T5ForConditionalGeneration,T5Tokenizer
3
+
4
+
5
+ def greedy_decoding (inp_ids,attn_mask,model,tokenizer):
6
+ greedy_output = model.generate(input_ids=inp_ids, attention_mask=attn_mask, max_length=256)
7
+ Question = tokenizer.decode(greedy_output[0], skip_special_tokens=True,clean_up_tokenization_spaces=True)
8
+ return Question.strip().capitalize()
9
+
10
+
11
+ def beam_search_decoding (inp_ids,attn_mask,model,tokenizer):
12
+ beam_output = model.generate(input_ids=inp_ids,
13
+ attention_mask=attn_mask,
14
+ max_length=256,
15
+ num_beams=10,
16
+ num_return_sequences=3,
17
+ no_repeat_ngram_size=2,
18
+ early_stopping=True
19
+ )
20
+ Questions = [tokenizer.decode(out, skip_special_tokens=True, clean_up_tokenization_spaces=True) for out in
21
+ beam_output]
22
+ return [Question.strip().capitalize() for Question in Questions]
23
+
24
+
25
+ def topkp_decoding (inp_ids,attn_mask,model,tokenizer):
26
+ topkp_output = model.generate(input_ids=inp_ids,
27
+ attention_mask=attn_mask,
28
+ max_length=256,
29
+ do_sample=True,
30
+ top_k=40,
31
+ top_p=0.80,
32
+ num_return_sequences=3,
33
+ no_repeat_ngram_size=2,
34
+ early_stopping=True
35
+ )
36
+ Questions = [tokenizer.decode(out, skip_special_tokens=True,clean_up_tokenization_spaces=True) for out in topkp_output]
37
+ return [Question.strip().capitalize() for Question in Questions]