dsilin commited on
Commit
40bc550
1 Parent(s): ee3e674

fixed readme

Browse files
Files changed (1) hide show
  1. README.md +10 -5
README.md CHANGED
@@ -1,9 +1,17 @@
 
 
 
 
 
 
 
 
1
  This model can be used to more accurately detokenize the moses tokenizer (it does a better job with certain lossy quotes and things)
2
 
3
 
4
  batched usage:
5
 
6
- ```
7
 
8
  sentences = [
9
  "They 're a young team . they have great players and amazing freshmen coming in , so think they 'll grow into themselves next year ,",
@@ -23,10 +31,7 @@ def sentences_to_input_tokens(sentences):
23
  max_length = 0
24
  sents_tokens = []
25
  iids = tokenizer(sentences)
26
- for sent_tokens in iids['input_ids']:
27
- # sent_tokens = tokenizer.build_inputs_with_special_tokens(
28
- # tokenizer.convert_tokens_to_ids(tokenizer.tokenize(sentence)))
29
-
30
  sents_tokens.append(sent_tokens)
31
 
32
  if len(sent_tokens) > max_length:
1
+ ---
2
+ language: english
3
+ widget:
4
+ - text: "They 're a young team . they have great players and amazing freshmen coming in , so think they 'll grow into themselves next year ,"
5
+ - text: "\" We 'll talk go by now ; \" says Shucksmith ;"
6
+ - text: "\" Warren Gatland is a professional person and it wasn 't a case of 's I 'll phone my mate Rob up to if he wants a coaching job ' , he would done a fair amount of homework about , \" Howley air said ."
7
+ ---
8
+
9
  This model can be used to more accurately detokenize the moses tokenizer (it does a better job with certain lossy quotes and things)
10
 
11
 
12
  batched usage:
13
 
14
+ ```python
15
 
16
  sentences = [
17
  "They 're a young team . they have great players and amazing freshmen coming in , so think they 'll grow into themselves next year ,",
31
  max_length = 0
32
  sents_tokens = []
33
  iids = tokenizer(sentences)
34
+ for sent_tokens in iids['input_ids']:
 
 
 
35
  sents_tokens.append(sent_tokens)
36
 
37
  if len(sent_tokens) > max_length: