hails commited on
Commit
f60b41d
1 Parent(s): f7ee0bd

hotfix tokenizer

Browse files
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:172dadf7cc6a2124dbe9269601bd554696c2dbffe1c4e4681e24c8c55414e4fb
3
  size 2854581673
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:caaa2623fb640123e81dadf1857139eba6e6b84be52232b084a5ebbce119f6c3
3
  size 2854581673
special_tokens_map.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
- "unk_token": "<|endoftext|>"
 
5
  }
 
1
  {
2
  "bos_token": "<|endoftext|>",
3
  "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>",
5
+ "pad_token": "<|padding|>"
6
  }
tokenizer.json CHANGED
@@ -227,6 +227,33 @@
227
  "rstrip": false,
228
  "normalized": true,
229
  "special": false
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
  }
231
  ],
232
  "normalizer": {
 
227
  "rstrip": false,
228
  "normalized": true,
229
  "special": false
230
+ },
231
+ {
232
+ "id": 50277,
233
+ "content": "<|SUF|>",
234
+ "single_word": false,
235
+ "lstrip": false,
236
+ "rstrip": false,
237
+ "normalized": true,
238
+ "special": false
239
+ },
240
+ {
241
+ "id": 50278,
242
+ "content": "<|PRE|>",
243
+ "single_word": false,
244
+ "lstrip": false,
245
+ "rstrip": false,
246
+ "normalized": true,
247
+ "special": false
248
+ },
249
+ {
250
+ "id": 50279,
251
+ "content": "<|MID|>",
252
+ "single_word": false,
253
+ "lstrip": false,
254
+ "rstrip": false,
255
+ "normalized": true,
256
+ "special": false
257
  }
258
  ],
259
  "normalizer": {
tokenizer_config.json CHANGED
@@ -2,7 +2,7 @@
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
4
  "eos_token": "<|endoftext|>",
5
- "name_or_path": "EleutherAI/gpt-neox-20b",
6
  "special_tokens_map_file": "/fsx/home-hailey/.cache/huggingface/hub/models--EleutherAI--gpt-neox-20b/snapshots/3523781c8df75f7741687a4284f6f70e1afa12f4/special_tokens_map.json",
7
  "tokenizer_class": "GPTNeoXTokenizer",
8
  "unk_token": "<|endoftext|>"
 
2
  "add_prefix_space": false,
3
  "bos_token": "<|endoftext|>",
4
  "eos_token": "<|endoftext|>",
5
+ "name_or_path": "/fsx/code-fim/FIM-clean/gpt-neox/newtokenizer",
6
  "special_tokens_map_file": "/fsx/home-hailey/.cache/huggingface/hub/models--EleutherAI--gpt-neox-20b/snapshots/3523781c8df75f7741687a4284f6f70e1afa12f4/special_tokens_map.json",
7
  "tokenizer_class": "GPTNeoXTokenizer",
8
  "unk_token": "<|endoftext|>"