root commited on
Commit
96c6b63
1 Parent(s): c67b6e6

Fix d_model & d_ff

Browse files
config.json CHANGED
@@ -1,35 +1,30 @@
1
  {
2
- "_name_or_path": "long-t5-tglobal-large",
3
  "architectures": [
4
  "LongT5ForConditionalGeneration"
5
  ],
6
- "d_ff": 5120,
7
  "d_kv": 64,
8
- "d_model": 1024,
9
- "decoder_start_token_id": 0,
10
- "dense_act_fn": "gelu_new",
11
  "dropout_rate": 0.1,
12
- "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
- "feed_forward_proj": "gated-gelu",
15
  "global_block_size": 16,
16
  "initializer_factor": 1.0,
17
  "is_encoder_decoder": true,
18
- "is_gated_act": true,
19
  "layer_norm_epsilon": 1e-06,
20
  "local_radius": 127,
21
  "model_type": "longt5",
22
- "n_positions": 4096,
23
- "num_decoder_layers": 24,
24
  "num_heads": 64,
25
  "num_layers": 48,
26
- "output_past": true,
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
30
- "tie_word_embeddings": false,
31
  "torch_dtype": "float32",
32
- "transformers_version": "4.23.1",
33
  "use_cache": true,
34
  "vocab_size": 32128
35
  }
1
  {
 
2
  "architectures": [
3
  "LongT5ForConditionalGeneration"
4
  ],
5
+ "d_ff": 10240,
6
  "d_kv": 64,
7
+ "d_model": 4096,
8
+ "dense_act_fn": "relu",
 
9
  "dropout_rate": 0.1,
10
+ "encoder_attention_type": "local",
11
  "eos_token_id": 1,
12
+ "feed_forward_proj": "relu",
13
  "global_block_size": 16,
14
  "initializer_factor": 1.0,
15
  "is_encoder_decoder": true,
16
+ "is_gated_act": false,
17
  "layer_norm_epsilon": 1e-06,
18
  "local_radius": 127,
19
  "model_type": "longt5",
20
+ "num_decoder_layers": 48,
 
21
  "num_heads": 64,
22
  "num_layers": 48,
 
23
  "pad_token_id": 0,
24
  "relative_attention_max_distance": 128,
25
  "relative_attention_num_buckets": 32,
 
26
  "torch_dtype": "float32",
27
+ "transformers_version": "4.25.0.dev0",
28
  "use_cache": true,
29
  "vocab_size": 32128
30
  }
pytorch_model-00001-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a6ea314d9d639dd15280e0c8a805a357f6e46900ae599825439057c90496355
3
+ size 9418835479
pytorch_model-00002-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3e4c7367f05566ee45aad7cf690d6dc891be574b467e627d52662065f95ad20
3
+ size 9966253699
pytorch_model-00003-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38bb749ace5a16554c4a402ddcc79b3b75b72b473f04bfe2cf243c3f2b6a8ed5
3
+ size 9966253711
pytorch_model-00004-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:360ee8f6eea8562796c8462262262db07a5019944bf8c22c567551609f36b618
3
+ size 9955796714
pytorch_model-00005-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d291ea24cc6a52606a4c968c6d8fbfc0b23c3d7b0505643b9726a0b066acd09
3
+ size 9999829704
pytorch_model-00006-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaa6112bda4901571cbe7fea4c0679606793a4cc9073383f00b214d43153d4d3
3
+ size 9999830697
pytorch_model-00007-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e486b640f0ac3f22bd921506943fed9257bc78fd54cf38a22f7b9469fcb806d2
3
+ size 9999846391
pytorch_model-00008-of-00008.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc7cf7b4ebb3a1efe9ce5a1bc6d07f672b97dc75c0b24012f4601adab0e526b1
3
+ size 3143808689
pytorch_model.bin.index.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:12ca3616ff8422a5cbf510b62876f29315db066e1256801b5931a70cabc3b620
3
- size 79600
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e792ebbc02388bfa94c5bcb167e76cde3f7e76571aefd43688c2dde31818f5b
3
+ size 92758