Utente
commited on
Commit
·
628a202
1
Parent(s):
84bd6e1
README modified
Browse files
README.md
CHANGED
@@ -40,21 +40,14 @@ tokenizer = T5Tokenizer.from_pretrained(raw_model)
|
|
40 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
41 |
max_size = 10000
|
42 |
|
43 |
-
def
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
**inputs,
|
49 |
-
num_beams=num_beams,
|
50 |
-
num_return_sequences=num_return_sequences,
|
51 |
-
max_length=max_size
|
52 |
-
)
|
53 |
-
# decode the generated sentences using the tokenizer to get them back to text
|
54 |
-
return tokenizer.batch_decode(outputs, skip_special_tokens=True)
|
55 |
|
56 |
-
|
57 |
-
|
58 |
|
59 |
```
|
60 |
|
|
|
40 |
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
41 |
max_size = 10000
|
42 |
|
43 |
+
def paraphrase(text, beams=100, grams=10, num_return_sequences=5):
|
44 |
+
x = tokenizer(text, return_tensors='pt', padding=True).to(model.device)
|
45 |
+
max_size = int(x.input_ids.shape[1] * 1.5 + 10)
|
46 |
+
out = model.generate(**x, encoder_no_repeat_ngram_size=grams, num_beams=beams, num_return_sequences=num_return_sequences, max_length=max_size)
|
47 |
+
return tokenizer.batch_decode(out, skip_special_tokens=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
+
sentence = "Due amici si incontrano al bar per discutere del modo migliore di generare parafrasi."
|
50 |
+
print(paraphrase(sentence))
|
51 |
|
52 |
```
|
53 |
|