fixed readme
Browse files
README.md
CHANGED
@@ -1,9 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
This model can be used to more accurately detokenize the moses tokenizer (it does a better job with certain lossy quotes and things)
|
2 |
|
3 |
|
4 |
batched usage:
|
5 |
|
6 |
-
```
|
7 |
|
8 |
sentences = [
|
9 |
"They 're a young team . they have great players and amazing freshmen coming in , so think they 'll grow into themselves next year ,",
|
@@ -23,10 +31,7 @@ def sentences_to_input_tokens(sentences):
|
|
23 |
max_length = 0
|
24 |
sents_tokens = []
|
25 |
iids = tokenizer(sentences)
|
26 |
-
for sent_tokens in iids['input_ids']:
|
27 |
-
# sent_tokens = tokenizer.build_inputs_with_special_tokens(
|
28 |
-
# tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentence)))
|
29 |
-
|
30 |
sents_tokens.append(sent_tokens)
|
31 |
|
32 |
if len(sent_tokens) > max_length:
|
|
|
1 |
+
---
|
2 |
+
language: english
|
3 |
+
widget:
|
4 |
+
- text: "They 're a young team . they have great players and amazing freshmen coming in , so think they 'll grow into themselves next year ,"
|
5 |
+
- text: "\" We 'll talk go by now ; \" says Shucksmith ;"
|
6 |
+
- text: "\" Warren Gatland is a professional person and it wasn 't a case of 's I 'll phone my mate Rob up to if he wants a coaching job ' , he would done a fair amount of homework about , \" Howley air said ."
|
7 |
+
---
|
8 |
+
|
9 |
This model can be used to more accurately detokenize the moses tokenizer (it does a better job with certain lossy quotes and things)
|
10 |
|
11 |
|
12 |
batched usage:
|
13 |
|
14 |
+
```python
|
15 |
|
16 |
sentences = [
|
17 |
"They 're a young team . they have great players and amazing freshmen coming in , so think they 'll grow into themselves next year ,",
|
|
|
31 |
max_length = 0
|
32 |
sents_tokens = []
|
33 |
iids = tokenizer(sentences)
|
34 |
+
for sent_tokens in iids['input_ids']:
|
|
|
|
|
|
|
35 |
sents_tokens.append(sent_tokens)
|
36 |
|
37 |
if len(sent_tokens) > max_length:
|