balaguru.s commited on
Commit
550af80
1 Parent(s): fad03c5

added missing tokenizers

Browse files
Checkpoints_1_6_M/gpt2-python-language-model/checkpoint-5130/config.json CHANGED
@@ -12,7 +12,6 @@
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
- "tokenizer_name" : "gpt2",
16
  "n_ctx": 1024,
17
  "n_embd": 768,
18
  "n_head": 12,
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
 
15
  "n_ctx": 1024,
16
  "n_embd": 768,
17
  "n_head": 12,
Checkpoints_1_6_M/gpt2-python-language-model/checkpoint-5130/merges.txt ADDED
The diff for this file is too large to render. See raw diff
Checkpoints_1_6_M/gpt2-python-language-model/checkpoint-5130/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
Checkpoints_1_6_M/gpt2-python-language-model/checkpoint-5130/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "name_or_path": "gpt2"}
Checkpoints_1_6_M/gpt2-python-language-model/checkpoint-5130/vocab.json ADDED
The diff for this file is too large to render. See raw diff
config.json CHANGED
@@ -12,7 +12,6 @@
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
- "tokenizer_name" : "gpt2",
16
  "n_ctx": 1024,
17
  "n_embd": 768,
18
  "n_head": 12,
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
 
15
  "n_ctx": 1024,
16
  "n_embd": 768,
17
  "n_head": 12,
merges.txt ADDED
The diff for this file is too large to render. See raw diff
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
1
+ {"errors": "replace", "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "model_max_length": 1024, "name_or_path": "gpt2"}
vocab.json ADDED
The diff for this file is too large to render. See raw diff