J38 commited on
Commit
9ccd482
1 Parent(s): aacf045

tokenizer files

Browse files
Files changed (4) hide show
  1. merges.txt +0 -0
  2. tokenizer.json +0 -0
  3. tokenizer_config.json +1 -0
  4. vocab.json +0 -0
merges.txt ADDED
The diff for this file is too large to render. See raw diff
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "stanford-crfm/pubmed_gpt_tokenizer", "tokenizer_class": "GPT2Tokenizer", "unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff