thomasw21
commited on
Commit
·
8943cc8
1
Parent(s):
b2ad2b6
Add tokenizer
Browse files- .gitattributes +1 -0
- special_tokens_map.json +1 -0
- tokenizer.json +3 -0
- tokenizer_config.json +1 -0
.gitattributes
CHANGED
@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
25 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
26 |
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
27 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
28 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": "<s>", "eos_token": "</s>", "unk_token": "<unk>", "pad_token": "<pad>"}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3fa39cd4b1500feb205bcce3b9703a4373414cafe4970e0657b413f7ddd2a9d3
|
3 |
+
size 14500438
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"unk_token": "<unk>", "eos_token": "</s>", "bos_token": "<s>", "pad_token": "<pad>", "special_tokens_map_file": "/Users/thomas/.cache/huggingface/transformers/9b8b2f4cb97dda0753c9b7213ca10bae9674703a4c64f786341b96a260d44985.9d6cd81ef646692fb1c169a880161ea1cb95f49694f220aced9b704b457e51dd", "name_or_path": "bigscience-catalogue-data-dev/byte-level-bpe-tokenizer-no-norm-250k-whitespace-and-eos-regex-alpha-v3-dedup-lines-articles", "tokenizer_class": "PreTrainedTokenizerFast"}
|