J38 commited on
Commit
39545d2
1 Parent(s): 9d29e9b

50k vocab, prefix_space=false,trained on PubMed Abstracts

Browse files
Files changed (4) hide show
  1. merges.txt +0 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +1 -1
  4. vocab.json +0 -0
merges.txt CHANGED
The diff for this file is too large to render. See raw diff
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"add_prefix_space": true, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "stanford-crfm/pubmed_gpt_tokenizer", "tokenizer_class": "GPT2Tokenizer", "normalizer": "Lowercase", "unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>"}
1
+ {"add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "stanford-crfm/pubmed_gpt_tokenizer", "tokenizer_class": "GPT2Tokenizer", "unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>"}
vocab.json CHANGED
The diff for this file is too large to render. See raw diff