gugarosa commited on
Commit
d5dd948
1 Parent(s): b9c2b13

commit files to HF hub

Browse files
Files changed (45) hide show
  1. gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/config.json +36 -0
  2. gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/pytorch_model.bin +3 -0
  3. gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/config.json +36 -0
  4. gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/pytorch_model.bin +3 -0
  5. gpt2_131845381012a68c3a358514fdffc12b09db1ed8/config.json +36 -0
  6. gpt2_131845381012a68c3a358514fdffc12b09db1ed8/pytorch_model.bin +3 -0
  7. gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/config.json +36 -0
  8. gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/pytorch_model.bin +3 -0
  9. gpt2_39563367097004cfd771d76d8822e51ad79b56d6/config.json +36 -0
  10. gpt2_39563367097004cfd771d76d8822e51ad79b56d6/pytorch_model.bin +3 -0
  11. gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/config.json +36 -0
  12. gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/pytorch_model.bin +3 -0
  13. gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/config.json +36 -0
  14. gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/pytorch_model.bin +3 -0
  15. gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/config.json +36 -0
  16. gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/pytorch_model.bin +3 -0
  17. gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/config.json +36 -0
  18. gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/pytorch_model.bin +3 -0
  19. gpt2_5fea22df661ad91676709da7a334505f15765659/config.json +36 -0
  20. gpt2_5fea22df661ad91676709da7a334505f15765659/pytorch_model.bin +3 -0
  21. gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/config.json +36 -0
  22. gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/pytorch_model.bin +3 -0
  23. gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/config.json +36 -0
  24. gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/pytorch_model.bin +3 -0
  25. gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/config.json +36 -0
  26. gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/pytorch_model.bin +3 -0
  27. gpt2_90682823835acabd965294775983a1d5a2c2fa43/config.json +36 -0
  28. gpt2_90682823835acabd965294775983a1d5a2c2fa43/pytorch_model.bin +3 -0
  29. gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/config.json +36 -0
  30. gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/pytorch_model.bin +3 -0
  31. gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/config.json +36 -0
  32. gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/pytorch_model.bin +3 -0
  33. gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/config.json +36 -0
  34. gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/pytorch_model.bin +3 -0
  35. gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/config.json +36 -0
  36. gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/pytorch_model.bin +3 -0
  37. gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/config.json +36 -0
  38. gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/pytorch_model.bin +3 -0
  39. gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/config.json +36 -0
  40. gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/pytorch_model.bin +3 -0
  41. merges.txt +0 -0
  42. special_tokens_map.json +5 -0
  43. tokenizer.json +0 -0
  44. tokenizer_config.json +10 -0
  45. vocab.json +0 -0
gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 832,
10
+ "d_model": 576,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 8,
20
+ "n_inner": null,
21
+ "n_layer": 7,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f371dbf7ee8293347e9aeaac442b49de4d714444d50895e4b57accb4ada15e2
3
+ size 363374757
gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 768,
10
+ "d_model": 768,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 7,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:201305736c097b60209d5d891b6d787234b3cc8fdbe87dea224ca5c3c3fd8555
3
+ size 363374757
gpt2_131845381012a68c3a358514fdffc12b09db1ed8/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 896,
10
+ "d_model": 448,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_131845381012a68c3a358514fdffc12b09db1ed8/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57b4d170b992148f04adc0e8411de473f60db86fb8cf1569a429053af3ce97e3
3
+ size 275160933
gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 448,
10
+ "d_model": 704,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 8,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b4a9492f86430eafed9f6a93a1267228c727e6ef8117b4cbd9f520dc079c3f3
3
+ size 392779421
gpt2_39563367097004cfd771d76d8822e51ad79b56d6/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 192,
10
+ "d_model": 768,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_39563367097004cfd771d76d8822e51ad79b56d6/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3200993b0cffcf428755f05eee1219b5f282869de34617266de3bbebbea1f884
3
+ size 275160933
gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 512,
10
+ "d_model": 704,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 7,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d2192254dfcb7d5eac05e15a861d3222c9e844c4be41b45858d6ecad4bb366b
3
+ size 363374757
gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 704,
10
+ "d_model": 896,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 8,
20
+ "n_inner": null,
21
+ "n_layer": 9,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d09deaa47c6238164aef588b61edd66850d1751688a7cab9d530985066d1fe6
3
+ size 422184085
gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 960,
10
+ "d_model": 704,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 9,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fbbe1aeac710b4f5a87e17bd2f6554c76930eec733ccc454e722f9d7a3bceed
3
+ size 422184085
gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 640,
10
+ "d_model": 320,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 10,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f15358bb885c195d9fd15f899edabd622749de83f4f60f347ced4324d60946b3
3
+ size 451588685
gpt2_5fea22df661ad91676709da7a334505f15765659/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 960,
10
+ "d_model": 768,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 8,
20
+ "n_inner": null,
21
+ "n_layer": 7,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_5fea22df661ad91676709da7a334505f15765659/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:636e68e4de0d8a194245645be90901740b88d440d1abc992147c4d61bed84acc
3
+ size 363374757
gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 960,
10
+ "d_model": 960,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 9,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d5ccbebb69308c8b27206f2af6eeb7e75a5f85d76e3dc32a225eeef4d45037
3
+ size 422184085
gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 960,
10
+ "d_model": 192,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 4,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf47a547664c9c6b9ee956715b8a4f3e45daee3d44acf27b06b1ba8e1a0afe06
3
+ size 275160933
gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 896,
10
+ "d_model": 384,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 4,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:542be96e76d2fe7da4dce56e9cd5a531f4ad4fd9f28041a8e29b2aa8033e60ee
3
+ size 275160933
gpt2_90682823835acabd965294775983a1d5a2c2fa43/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 576,
10
+ "d_model": 320,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_90682823835acabd965294775983a1d5a2c2fa43/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fde74639eba8e8e9ac5319918623ae23d12f13aa984905348921ca205f596f6
3
+ size 275160933
gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 640,
10
+ "d_model": 320,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 2,
20
+ "n_inner": null,
21
+ "n_layer": 9,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78d6c637198e4944b8e8d7f359d3f647f1a64bcf0f4a357459babdb3753b3117
3
+ size 422184085
gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 960,
10
+ "d_model": 960,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 4,
20
+ "n_inner": null,
21
+ "n_layer": 6,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c7a7c2a35344e7489cca476b743b04ae6bfdd2042434336640b632755cb950
3
+ size 333970169
gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 768,
10
+ "d_model": 128,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 4,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d09cb028b47c592a6b06b0a8086c32cf947830d8d200afd431a8f6f70c6f8297
3
+ size 275160933
gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 896,
10
+ "d_model": 576,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 8,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2cc5a9f7c872e107d4f4dc856f1137c86d29da1f02398bc2a9f0c3885e3759
3
+ size 275160933
gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 896,
10
+ "d_model": 320,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 4,
20
+ "n_inner": null,
21
+ "n_layer": 4,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:11e78f94a56a38646f828b4745149ae528891969b412920eb7d76245cae47da9
3
+ size 275160933
gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/config.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_function": "gelu_new",
3
+ "arch_type": "gpt2",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 0,
9
+ "d_inner": 960,
10
+ "d_model": 384,
11
+ "dropatt": 0.0,
12
+ "embd_pdrop": 0.1,
13
+ "eos_token_id": 0,
14
+ "initializer_range": 0.02,
15
+ "layer_norm_epsilon": 1e-05,
16
+ "max_sequence_length": 1024,
17
+ "model_type": "gpt2",
18
+ "n_embd": 768,
19
+ "n_head": 4,
20
+ "n_inner": null,
21
+ "n_layer": 10,
22
+ "n_positions": 1024,
23
+ "reorder_and_upcast_attn": false,
24
+ "resid_pdrop": 0.1,
25
+ "scale_attn_by_inverse_layer_idx": false,
26
+ "scale_attn_weights": true,
27
+ "summary_activation": null,
28
+ "summary_first_dropout": 0.1,
29
+ "summary_proj_to_labels": true,
30
+ "summary_type": "cls_index",
31
+ "summary_use_proj": true,
32
+ "torch_dtype": "float32",
33
+ "transformers_version": "4.26.1",
34
+ "use_cache": true,
35
+ "vocab_size": 50257
36
+ }
gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4db51b31f79a74a1bfe6f1601646fe5db575b6c44e2811cb427fa48a02938e00
3
+ size 451588685
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<|endoftext|>",
3
+ "eos_token": "<|endoftext|>",
4
+ "unk_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<|endoftext|>",
4
+ "eos_token": "<|endoftext|>",
5
+ "model_max_length": 1024,
6
+ "name_or_path": "gpt2",
7
+ "special_tokens_map_file": null,
8
+ "tokenizer_class": "GPT2Tokenizer",
9
+ "unk_token": "<|endoftext|>"
10
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff