commit files to HF hub
Browse files- gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/config.json +36 -0
- gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/pytorch_model.bin +3 -0
- gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/config.json +36 -0
- gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/pytorch_model.bin +3 -0
- gpt2_131845381012a68c3a358514fdffc12b09db1ed8/config.json +36 -0
- gpt2_131845381012a68c3a358514fdffc12b09db1ed8/pytorch_model.bin +3 -0
- gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/config.json +36 -0
- gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/pytorch_model.bin +3 -0
- gpt2_39563367097004cfd771d76d8822e51ad79b56d6/config.json +36 -0
- gpt2_39563367097004cfd771d76d8822e51ad79b56d6/pytorch_model.bin +3 -0
- gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/config.json +36 -0
- gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/pytorch_model.bin +3 -0
- gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/config.json +36 -0
- gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/pytorch_model.bin +3 -0
- gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/config.json +36 -0
- gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/pytorch_model.bin +3 -0
- gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/config.json +36 -0
- gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/pytorch_model.bin +3 -0
- gpt2_5fea22df661ad91676709da7a334505f15765659/config.json +36 -0
- gpt2_5fea22df661ad91676709da7a334505f15765659/pytorch_model.bin +3 -0
- gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/config.json +36 -0
- gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/pytorch_model.bin +3 -0
- gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/config.json +36 -0
- gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/pytorch_model.bin +3 -0
- gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/config.json +36 -0
- gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/pytorch_model.bin +3 -0
- gpt2_90682823835acabd965294775983a1d5a2c2fa43/config.json +36 -0
- gpt2_90682823835acabd965294775983a1d5a2c2fa43/pytorch_model.bin +3 -0
- gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/config.json +36 -0
- gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/pytorch_model.bin +3 -0
- gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/config.json +36 -0
- gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/pytorch_model.bin +3 -0
- gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/config.json +36 -0
- gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/pytorch_model.bin +3 -0
- gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/config.json +36 -0
- gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/pytorch_model.bin +3 -0
- gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/config.json +36 -0
- gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/pytorch_model.bin +3 -0
- gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/config.json +36 -0
- gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/pytorch_model.bin +3 -0
- merges.txt +0 -0
- special_tokens_map.json +5 -0
- tokenizer.json +0 -0
- tokenizer_config.json +10 -0
- vocab.json +0 -0
gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 832,
|
10 |
+
"d_model": 576,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 8,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 7,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_0e1b5a3c867d6473da270799061f3089a1df5afd/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f371dbf7ee8293347e9aeaac442b49de4d714444d50895e4b57accb4ada15e2
|
3 |
+
size 363374757
|
gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 768,
|
10 |
+
"d_model": 768,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 7,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_0e8c86e6babd924ff8b511c94cc1647bf61f81a2/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:201305736c097b60209d5d891b6d787234b3cc8fdbe87dea224ca5c3c3fd8555
|
3 |
+
size 363374757
|
gpt2_131845381012a68c3a358514fdffc12b09db1ed8/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 896,
|
10 |
+
"d_model": 448,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_131845381012a68c3a358514fdffc12b09db1ed8/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:57b4d170b992148f04adc0e8411de473f60db86fb8cf1569a429053af3ce97e3
|
3 |
+
size 275160933
|
gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 448,
|
10 |
+
"d_model": 704,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 8,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_1e9d92f0fed7288facc68cb448863e8120ccca9c/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b4a9492f86430eafed9f6a93a1267228c727e6ef8117b4cbd9f520dc079c3f3
|
3 |
+
size 392779421
|
gpt2_39563367097004cfd771d76d8822e51ad79b56d6/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 192,
|
10 |
+
"d_model": 768,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_39563367097004cfd771d76d8822e51ad79b56d6/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3200993b0cffcf428755f05eee1219b5f282869de34617266de3bbebbea1f884
|
3 |
+
size 275160933
|
gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 512,
|
10 |
+
"d_model": 704,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 7,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_3b30c85ac08c6b12b0ea46cb832270ba52b7fcd8/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d2192254dfcb7d5eac05e15a861d3222c9e844c4be41b45858d6ecad4bb366b
|
3 |
+
size 363374757
|
gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 704,
|
10 |
+
"d_model": 896,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 8,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 9,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_4352a56f3fa9e7ba6d291867d356a08022753658/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d09deaa47c6238164aef588b61edd66850d1751688a7cab9d530985066d1fe6
|
3 |
+
size 422184085
|
gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 960,
|
10 |
+
"d_model": 704,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 9,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_46e7c68a025417e20a7e13bd4c1ee71438d28069/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7fbbe1aeac710b4f5a87e17bd2f6554c76930eec733ccc454e722f9d7a3bceed
|
3 |
+
size 422184085
|
gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 640,
|
10 |
+
"d_model": 320,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 10,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_538d4b101df48595a935d90dbf4a7fb2ac09ac01/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f15358bb885c195d9fd15f899edabd622749de83f4f60f347ced4324d60946b3
|
3 |
+
size 451588685
|
gpt2_5fea22df661ad91676709da7a334505f15765659/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 960,
|
10 |
+
"d_model": 768,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 8,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 7,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_5fea22df661ad91676709da7a334505f15765659/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:636e68e4de0d8a194245645be90901740b88d440d1abc992147c4d61bed84acc
|
3 |
+
size 363374757
|
gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 960,
|
10 |
+
"d_model": 960,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 9,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_6c6e63116ff74ba444ff5a08cef54380073ebea3/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f4d5ccbebb69308c8b27206f2af6eeb7e75a5f85d76e3dc32a225eeef4d45037
|
3 |
+
size 422184085
|
gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 960,
|
10 |
+
"d_model": 192,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 4,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_80fabe4acddff0dc796e287588e40d86e79df4b2/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf47a547664c9c6b9ee956715b8a4f3e45daee3d44acf27b06b1ba8e1a0afe06
|
3 |
+
size 275160933
|
gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 896,
|
10 |
+
"d_model": 384,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 4,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_8f5159304179c77ecdc69c953b71a3f8fa528564/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:542be96e76d2fe7da4dce56e9cd5a531f4ad4fd9f28041a8e29b2aa8033e60ee
|
3 |
+
size 275160933
|
gpt2_90682823835acabd965294775983a1d5a2c2fa43/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 576,
|
10 |
+
"d_model": 320,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_90682823835acabd965294775983a1d5a2c2fa43/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4fde74639eba8e8e9ac5319918623ae23d12f13aa984905348921ca205f596f6
|
3 |
+
size 275160933
|
gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 640,
|
10 |
+
"d_model": 320,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 2,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 9,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_917c2f9601a1c29d1f280bb172015e5fb210b6b3/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:78d6c637198e4944b8e8d7f359d3f647f1a64bcf0f4a357459babdb3753b3117
|
3 |
+
size 422184085
|
gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 960,
|
10 |
+
"d_model": 960,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 4,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 6,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_98b0196b5a865ba76f31723646f33e0461dc910d/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:02c7a7c2a35344e7489cca476b743b04ae6bfdd2042434336640b632755cb950
|
3 |
+
size 333970169
|
gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 768,
|
10 |
+
"d_model": 128,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 4,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_a9e3147996070fda25af4b39ed95b6a18d6d0402/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d09cb028b47c592a6b06b0a8086c32cf947830d8d200afd431a8f6f70c6f8297
|
3 |
+
size 275160933
|
gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 896,
|
10 |
+
"d_model": 576,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 8,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_c679fa01f00dd6f584614c6d9784eb233b047283/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ad2cc5a9f7c872e107d4f4dc856f1137c86d29da1f02398bc2a9f0c3885e3759
|
3 |
+
size 275160933
|
gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 896,
|
10 |
+
"d_model": 320,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 4,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 4,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_c76bdddb5cf59275711672daa5b8c70e6c78bf4e/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:11e78f94a56a38646f828b4745149ae528891969b412920eb7d76245cae47da9
|
3 |
+
size 275160933
|
gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/config.json
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"activation_function": "gelu_new",
|
3 |
+
"arch_type": "gpt2",
|
4 |
+
"architectures": [
|
5 |
+
"GPT2LMHeadModel"
|
6 |
+
],
|
7 |
+
"attn_pdrop": 0.1,
|
8 |
+
"bos_token_id": 0,
|
9 |
+
"d_inner": 960,
|
10 |
+
"d_model": 384,
|
11 |
+
"dropatt": 0.0,
|
12 |
+
"embd_pdrop": 0.1,
|
13 |
+
"eos_token_id": 0,
|
14 |
+
"initializer_range": 0.02,
|
15 |
+
"layer_norm_epsilon": 1e-05,
|
16 |
+
"max_sequence_length": 1024,
|
17 |
+
"model_type": "gpt2",
|
18 |
+
"n_embd": 768,
|
19 |
+
"n_head": 4,
|
20 |
+
"n_inner": null,
|
21 |
+
"n_layer": 10,
|
22 |
+
"n_positions": 1024,
|
23 |
+
"reorder_and_upcast_attn": false,
|
24 |
+
"resid_pdrop": 0.1,
|
25 |
+
"scale_attn_by_inverse_layer_idx": false,
|
26 |
+
"scale_attn_weights": true,
|
27 |
+
"summary_activation": null,
|
28 |
+
"summary_first_dropout": 0.1,
|
29 |
+
"summary_proj_to_labels": true,
|
30 |
+
"summary_type": "cls_index",
|
31 |
+
"summary_use_proj": true,
|
32 |
+
"torch_dtype": "float32",
|
33 |
+
"transformers_version": "4.26.1",
|
34 |
+
"use_cache": true,
|
35 |
+
"vocab_size": 50257
|
36 |
+
}
|
gpt2_ddf63c1125f1fed5a7dd3537f640834187719996/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4db51b31f79a74a1bfe6f1601646fe5db575b6c44e2811cb427fa48a02938e00
|
3 |
+
size 451588685
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
special_tokens_map.json
ADDED
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": "<|endoftext|>",
|
3 |
+
"eos_token": "<|endoftext|>",
|
4 |
+
"unk_token": "<|endoftext|>"
|
5 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_prefix_space": false,
|
3 |
+
"bos_token": "<|endoftext|>",
|
4 |
+
"eos_token": "<|endoftext|>",
|
5 |
+
"model_max_length": 1024,
|
6 |
+
"name_or_path": "gpt2",
|
7 |
+
"special_tokens_map_file": null,
|
8 |
+
"tokenizer_class": "GPT2Tokenizer",
|
9 |
+
"unk_token": "<|endoftext|>"
|
10 |
+
}
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|