arxyzan commited on
Commit
7149eca
1 Parent(s): 4eab14c

Hezar: Upload tokenizer_config.yaml

Browse files
Files changed (1) hide show
  1. preprocessor/tokenizer_config.yaml +6 -14
preprocessor/tokenizer_config.yaml CHANGED
@@ -7,22 +7,14 @@ stride: 0
7
  padding_strategy: longest
8
  padding_direction: right
9
  pad_to_multiple_of: 0
10
- pad_token_id: 0
11
- pad_token: <pad>
12
  pad_token_type_id: 0
 
 
13
  unk_token: <unk>
14
- special_tokens:
15
- - <s>
16
- - <pad>
17
- - </s>
18
- - <unk>
19
- - <mask>
20
- - <|endoftext|>
21
- - <|startoftext|>
22
- - <nl>
23
- - <hs>
24
- - <sep>
25
- - <cls>
26
  continuing_subword_prefix: ''
27
  end_of_word_suffix: ''
28
  fuse_unk: false
 
7
  padding_strategy: longest
8
  padding_direction: right
9
  pad_to_multiple_of: 0
 
 
10
  pad_token_type_id: 0
11
+ bos_token: <s>
12
+ eos_token: </s>
13
  unk_token: <unk>
14
+ sep_token: <sep>
15
+ pad_token: <pad>
16
+ cls_token: <cls>
17
+ mask_token: <mask>
 
 
 
 
 
 
 
 
18
  continuing_subword_prefix: ''
19
  end_of_word_suffix: ''
20
  fuse_unk: false