puri commited on
Commit
2adc97b
1 Parent(s): e74f2d7

change tokenizer configuration

Browse files
Files changed (2) hide show
  1. .gitattributes +2 -0
  2. tokenizer_config.json +1 -1
.gitattributes CHANGED
@@ -6,3 +6,5 @@
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
 
 
6
  *.tar.gz filter=lfs diff=lfs merge=lfs -text
7
  *.ot filter=lfs diff=lfs merge=lfs -text
8
  *.onnx filter=lfs diff=lfs merge=lfs -text
9
+ pytorch_model.bin filter=lfs diff=lfs merge=lfs -text
10
+ tf_model.h5 filter=lfs diff=lfs merge=lfs -text
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"do_lower_case": true, "remove_space": true, "keep_accents": false, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": "[MASK]", "max_len": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "/root/workspace/thesis/tokenizer_v2/"}
1
+ {"do_lower_case": false, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": "[MASK]", "max_len": 512, "special_tokens_map_file": null, "tokenizer_file": null, "name_or_path": "/root/workspace/thesis/tokenizer_v2/"}