root
commited on
Commit
•
96c6b63
1
Parent(s):
c67b6e6
Fix d_model & d_ff
Browse files- config.json +8 -13
- pytorch_model-00001-of-00008.bin +3 -0
- pytorch_model-00002-of-00008.bin +3 -0
- pytorch_model-00003-of-00008.bin +3 -0
- pytorch_model-00004-of-00008.bin +3 -0
- pytorch_model-00005-of-00008.bin +3 -0
- pytorch_model-00006-of-00008.bin +3 -0
- pytorch_model-00007-of-00008.bin +3 -0
- pytorch_model-00008-of-00008.bin +3 -0
- pytorch_model.bin.index.json +2 -2
config.json
CHANGED
@@ -1,35 +1,30 @@
|
|
1 |
{
|
2 |
-
"_name_or_path": "long-t5-tglobal-large",
|
3 |
"architectures": [
|
4 |
"LongT5ForConditionalGeneration"
|
5 |
],
|
6 |
-
"d_ff":
|
7 |
"d_kv": 64,
|
8 |
-
"d_model":
|
9 |
-
"
|
10 |
-
"dense_act_fn": "gelu_new",
|
11 |
"dropout_rate": 0.1,
|
12 |
-
"encoder_attention_type": "
|
13 |
"eos_token_id": 1,
|
14 |
-
"feed_forward_proj": "
|
15 |
"global_block_size": 16,
|
16 |
"initializer_factor": 1.0,
|
17 |
"is_encoder_decoder": true,
|
18 |
-
"is_gated_act":
|
19 |
"layer_norm_epsilon": 1e-06,
|
20 |
"local_radius": 127,
|
21 |
"model_type": "longt5",
|
22 |
-
"
|
23 |
-
"num_decoder_layers": 24,
|
24 |
"num_heads": 64,
|
25 |
"num_layers": 48,
|
26 |
-
"output_past": true,
|
27 |
"pad_token_id": 0,
|
28 |
"relative_attention_max_distance": 128,
|
29 |
"relative_attention_num_buckets": 32,
|
30 |
-
"tie_word_embeddings": false,
|
31 |
"torch_dtype": "float32",
|
32 |
-
"transformers_version": "4.
|
33 |
"use_cache": true,
|
34 |
"vocab_size": 32128
|
35 |
}
|
1 |
{
|
|
|
2 |
"architectures": [
|
3 |
"LongT5ForConditionalGeneration"
|
4 |
],
|
5 |
+
"d_ff": 10240,
|
6 |
"d_kv": 64,
|
7 |
+
"d_model": 4096,
|
8 |
+
"dense_act_fn": "relu",
|
|
|
9 |
"dropout_rate": 0.1,
|
10 |
+
"encoder_attention_type": "local",
|
11 |
"eos_token_id": 1,
|
12 |
+
"feed_forward_proj": "relu",
|
13 |
"global_block_size": 16,
|
14 |
"initializer_factor": 1.0,
|
15 |
"is_encoder_decoder": true,
|
16 |
+
"is_gated_act": false,
|
17 |
"layer_norm_epsilon": 1e-06,
|
18 |
"local_radius": 127,
|
19 |
"model_type": "longt5",
|
20 |
+
"num_decoder_layers": 48,
|
|
|
21 |
"num_heads": 64,
|
22 |
"num_layers": 48,
|
|
|
23 |
"pad_token_id": 0,
|
24 |
"relative_attention_max_distance": 128,
|
25 |
"relative_attention_num_buckets": 32,
|
|
|
26 |
"torch_dtype": "float32",
|
27 |
+
"transformers_version": "4.25.0.dev0",
|
28 |
"use_cache": true,
|
29 |
"vocab_size": 32128
|
30 |
}
|
pytorch_model-00001-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a6ea314d9d639dd15280e0c8a805a357f6e46900ae599825439057c90496355
|
3 |
+
size 9418835479
|
pytorch_model-00002-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d3e4c7367f05566ee45aad7cf690d6dc891be574b467e627d52662065f95ad20
|
3 |
+
size 9966253699
|
pytorch_model-00003-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:38bb749ace5a16554c4a402ddcc79b3b75b72b473f04bfe2cf243c3f2b6a8ed5
|
3 |
+
size 9966253711
|
pytorch_model-00004-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:360ee8f6eea8562796c8462262262db07a5019944bf8c22c567551609f36b618
|
3 |
+
size 9955796714
|
pytorch_model-00005-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6d291ea24cc6a52606a4c968c6d8fbfc0b23c3d7b0505643b9726a0b066acd09
|
3 |
+
size 9999829704
|
pytorch_model-00006-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aaa6112bda4901571cbe7fea4c0679606793a4cc9073383f00b214d43153d4d3
|
3 |
+
size 9999830697
|
pytorch_model-00007-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e486b640f0ac3f22bd921506943fed9257bc78fd54cf38a22f7b9469fcb806d2
|
3 |
+
size 9999846391
|
pytorch_model-00008-of-00008.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cc7cf7b4ebb3a1efe9ce5a1bc6d07f672b97dc75c0b24012f4601adab0e526b1
|
3 |
+
size 3143808689
|
pytorch_model.bin.index.json
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e792ebbc02388bfa94c5bcb167e76cde3f7e76571aefd43688c2dde31818f5b
|
3 |
+
size 92758
|