Utente commited on
Commit
628a202
·
1 Parent(s): 84bd6e1

README modified

Browse files
Files changed (1) hide show
  1. README.md +7 -14
README.md CHANGED
@@ -40,21 +40,14 @@ tokenizer = T5Tokenizer.from_pretrained(raw_model)
40
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
41
  max_size = 10000
42
 
43
- def get_paraphrased_sentences(model, tokenizer, sentence, num_return_sequences=5, num_beams=5):
44
- # tokenize the text to be form of a list of token IDs
45
- inputs = tokenizer([sentence], truncation=True, padding="longest", return_tensors="pt").to(model.device)
46
- # generate the paraphrased sentences
47
- outputs = model.generate(
48
- **inputs,
49
- num_beams=num_beams,
50
- num_return_sequences=num_return_sequences,
51
- max_length=max_size
52
- )
53
- # decode the generated sentences using the tokenizer to get them back to text
54
- return tokenizer.batch_decode(outputs, skip_special_tokens=True)
55
 
56
- # sentence = "Vorrei chiedervi la procedura per recuperare la chiave di accesso al mio profilo personale. L'ho persa e vorrei recuperarla."
57
- # get_paraphrased_sentences(model, tokenizer, sentence, num_beams=100, num_return_sequences=5)
58
 
59
  ```
60
 
 
40
  device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
41
  max_size = 10000
42
 
43
+ def paraphrase(text, beams=100, grams=10, num_return_sequences=5):
44
+ x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
45
+ max_size = int(x.input_ids.shape[1] * 1.5 + 10)
46
+ out = model.generate(**x, encoder_no_repeat_ngram_size=grams, num_beams=beams, num_return_sequences=num_return_sequences, max_length=max_size)
47
+ return tokenizer.batch_decode(out, skip_special_tokens=True)
 
 
 
 
 
 
 
48
 
49
+ sentence = "Due amici si incontrano al bar per discutere del modo migliore di generare parafrasi."
50
+ print(paraphrase(sentence))
51
 
52
  ```
53