Joemgu commited on
Commit
c25b868
1 Parent(s): 03c0d05

Training in progress, step 200

Browse files
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "agemagician/mlong-t5-tglobal-base",
3
  "architectures": [
4
  "LongT5ForConditionalGeneration"
5
  ],
@@ -8,7 +8,7 @@
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.1,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
@@ -27,22 +27,22 @@
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
30
- "tie_word_embeddings": false,
31
- "torch_dtype": "float32",
32
- "transformers_version": "4.30.0",
33
  "task_specific_params": {
34
  "summarization": {
35
  "early_stopping": true,
 
36
  "length_penalty": 2.0,
37
- "repetition_penalty": 3.5,
38
  "max_length": 256,
39
  "min_length": 16,
40
- "encoder_no_repeat_ngram_size": 5,
41
  "no_repeat_ngram_size": 4,
42
  "num_beams": 4,
43
- "prefix": "Write a title and summarize: "
 
44
  }
45
  },
46
- "use_cache": true,
 
 
 
47
  "vocab_size": 256384
48
  }
 
1
  {
2
+ "_name_or_path": "Joemgu/mlong-t5-base-sumstew",
3
  "architectures": [
4
  "LongT5ForConditionalGeneration"
5
  ],
 
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.0,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
 
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
 
 
 
30
  "task_specific_params": {
31
  "summarization": {
32
  "early_stopping": true,
33
+ "encoder_no_repeat_ngram_size": 5,
34
  "length_penalty": 2.0,
 
35
  "max_length": 256,
36
  "min_length": 16,
 
37
  "no_repeat_ngram_size": 4,
38
  "num_beams": 4,
39
+ "prefix": "Write a title and summarize: ",
40
+ "repetition_penalty": 3.5
41
  }
42
  },
43
+ "tie_word_embeddings": false,
44
+ "torch_dtype": "float32",
45
+ "transformers_version": "4.30.0",
46
+ "use_cache": false,
47
  "vocab_size": 256384
48
  }
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "agemagician/mlong-t5-tglobal-base",
3
  "architectures": [
4
  "LongT5ForConditionalGeneration"
5
  ],
@@ -8,7 +8,7 @@
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.1,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
@@ -27,6 +27,19 @@
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
 
 
 
 
 
 
 
 
 
 
 
 
 
30
  "tie_word_embeddings": false,
31
  "torch_dtype": "float32",
32
  "transformers_version": "4.30.0",
 
1
  {
2
+ "_name_or_path": "Joemgu/mlong-t5-base-sumstew",
3
  "architectures": [
4
  "LongT5ForConditionalGeneration"
5
  ],
 
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.0,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
 
27
  "pad_token_id": 0,
28
  "relative_attention_max_distance": 128,
29
  "relative_attention_num_buckets": 32,
30
+ "task_specific_params": {
31
+ "summarization": {
32
+ "early_stopping": true,
33
+ "encoder_no_repeat_ngram_size": 5,
34
+ "length_penalty": 2.0,
35
+ "max_length": 256,
36
+ "min_length": 16,
37
+ "no_repeat_ngram_size": 4,
38
+ "num_beams": 4,
39
+ "prefix": "Write a title and summarize: ",
40
+ "repetition_penalty": 3.5
41
+ }
42
+ },
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "float32",
45
  "transformers_version": "4.30.0",
last-checkpoint/generation_config.json CHANGED
@@ -1,5 +1,6 @@
1
  {
2
  "decoder_start_token_id": 0,
3
  "max_length": 1024,
 
4
  "transformers_version": "4.30.0"
5
  }
 
1
  {
2
  "decoder_start_token_id": 0,
3
  "max_length": 1024,
4
+ "num_beams": 4,
5
  "transformers_version": "4.30.0"
6
  }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f6de8b06ed44d4963b25bb376812b173405edc863a23e8867e0f736341d74a8d
3
  size 4736616809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:295a5920bae53deab38e6f92397053ad981c5d87986421287daf12cbf976c28a
3
  size 4736616809
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02c85713dad1dcfeb5c32c03d2b6f448d20d550a87402f9d21be276100bce607
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8519e140daa38e7801673b244b2c2508c35bb63f95898abecfb02f2ae1506410
3
  size 2368281769
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a2eb101bb6b101af129672a2bea24581932e36cd77fc392a7cda646daab49ff
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f9ca352cb22c2e874355bb7654391e036047a8bceae9ffa8161bf19e850e7704
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cabb275feefad232b9c910702270f8d2a4ae8df759c0f8aeaf1aba436940e944
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dfed3e8b1bba7802f62911f65b760f66c94bd2067f28ece04f4710d040c1fa11
3
  size 627
last-checkpoint/tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1aeab8f9f01e0f5c8f9b8e06e3c1546696dff62a47a6729a0f91aaa24c98da4
3
- size 16795620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:750ffbacc45c2f284f16da1d281fedfe2ed16f956306c0999ccaeb7b08554793
3
+ size 16780024
last-checkpoint/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359df1e59d8152b6ac8b970534543aecdaf2a516151d0c7925f6ee4b8bebd509
3
  size 5371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb1ee3100dd5731f643b0702779cb0f885a239198814a1b5fd38a1b3541b399
3
  size 5371
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:02c85713dad1dcfeb5c32c03d2b6f448d20d550a87402f9d21be276100bce607
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8519e140daa38e7801673b244b2c2508c35bb63f95898abecfb02f2ae1506410
3
  size 2368281769
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1aeab8f9f01e0f5c8f9b8e06e3c1546696dff62a47a6729a0f91aaa24c98da4
3
- size 16795620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:750ffbacc45c2f284f16da1d281fedfe2ed16f956306c0999ccaeb7b08554793
3
+ size 16780024
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:359df1e59d8152b6ac8b970534543aecdaf2a516151d0c7925f6ee4b8bebd509
3
  size 5371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cb1ee3100dd5731f643b0702779cb0f885a239198814a1b5fd38a1b3541b399
3
  size 5371