af1tang commited on
Commit
7332f20
1 Parent(s): 521ee1d

tokenizer update

Browse files
added_tokens.json CHANGED
@@ -1 +1 @@
1
- {"<|sep|>": 50257, "<|cls|>": 50258, "<|start|>": 50259, "<|p1|>": 50260, "<|p2|>": 50261}
1
+ {"<|sep|>": 50257, "<|act|>": 50262, "<|cls|>": 50258, "<|start|>": 50259, "<|p1|>": 50260, "<|p2|>": 50261}
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPT2LMHeadModel"
@@ -7,19 +8,25 @@
7
  "bos_token_id": 50256,
8
  "embd_pdrop": 0.1,
9
  "eos_token_id": 50256,
 
10
  "initializer_range": 0.02,
11
  "layer_norm_epsilon": 1e-05,
12
  "model_type": "gpt2",
13
  "n_ctx": 1024,
14
  "n_embd": 1024,
15
  "n_head": 16,
 
16
  "n_layer": 24,
17
  "n_positions": 1024,
18
  "resid_pdrop": 0.1,
 
19
  "summary_activation": null,
20
  "summary_first_dropout": 0.1,
21
  "summary_proj_to_labels": true,
22
  "summary_type": "cls_index",
23
  "summary_use_proj": true,
24
- "vocab_size": 50262
 
 
 
25
  }
1
  {
2
+ "_name_or_path": "af1tang/personaGPT",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
8
  "bos_token_id": 50256,
9
  "embd_pdrop": 0.1,
10
  "eos_token_id": 50256,
11
+ "gradient_checkpointing": false,
12
  "initializer_range": 0.02,
13
  "layer_norm_epsilon": 1e-05,
14
  "model_type": "gpt2",
15
  "n_ctx": 1024,
16
  "n_embd": 1024,
17
  "n_head": 16,
18
+ "n_inner": null,
19
  "n_layer": 24,
20
  "n_positions": 1024,
21
  "resid_pdrop": 0.1,
22
+ "scale_attn_weights": true,
23
  "summary_activation": null,
24
  "summary_first_dropout": 0.1,
25
  "summary_proj_to_labels": true,
26
  "summary_type": "cls_index",
27
  "summary_use_proj": true,
28
+ "torch_dtype": "float32",
29
+ "transformers_version": "4.10.0",
30
+ "use_cache": true,
31
+ "vocab_size": 50263
32
  }
merges.txt CHANGED
@@ -1,4 +1,4 @@
1
- #version: 0.2
2
  Ġ t
3
  Ġ a
4
  h e
1
+ #version: 0.2 - Trained by `huggingface/tokenizers`
2
  Ġ t
3
  Ġ a
4
  h e
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d7582d23d2d19e37339c15685b171dce5e6ea98787a82b40ff3c4ad8011a87a6
3
- size 1444551925
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7397c2e647a7d4e328b67f4d5cac13cb32713a6282a94e41db2ed44782aad2e3
3
+ size 1444556021
special_tokens_map.json CHANGED
@@ -1 +1 @@
1
- {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": "<|sep|>", "pad_token": "<|endoftext|>", "cls_token": "<|cls|>", "additional_special_tokens": ["<|start|>", "<|p1|>", "<|p2|>"]}
1
+ {"bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true}, "sep_token": "<|sep|>", "pad_token": "<|endoftext|>", "cls_token": "<|cls|>", "additional_special_tokens": ["<|start|>", "<|p1|>", "<|p2|>", "<|act|>"]}
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"pad_token": "<|endoftext|>", "cls_token": "<|cls|>", "sep_token": "<|sep|>", "special_tokens_map_file": null, "full_tokenizer_file": null}
1
+ {"unk_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "bos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "eos_token": {"content": "<|endoftext|>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "add_prefix_space": false, "pad_token": "<|endoftext|>", "cls_token": "<|cls|>", "sep_token": "<|sep|>", "special_tokens_map_file": null, "full_tokenizer_file": null, "name_or_path": "af1tang/personaGPT", "errors": "replace", "tokenizer_class": "GPT2Tokenizer"}
vocab.json CHANGED
The diff for this file is too large to render. See raw diff