Training in progress, step 200

Files changed (13) hide show

config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "agemagician/mlong-t5-tglobal-base",
   "architectures": [
     "LongT5ForConditionalGeneration"
   ],
@@ -8,7 +8,7 @@
   "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
   "encoder_attention_type": "transient-global",
   "eos_token_id": 1,
   "feed_forward_proj": "gated-gelu",
@@ -27,22 +27,22 @@
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,
   "relative_attention_num_buckets": 32,
-  "tie_word_embeddings": false,
-  "torch_dtype": "float32",
-  "transformers_version": "4.30.0",
   "task_specific_params": {
     "summarization": {
       "early_stopping": true,
       "length_penalty": 2.0,
-      "repetition_penalty": 3.5,
       "max_length": 256,
       "min_length": 16,
-      "encoder_no_repeat_ngram_size": 5,
       "no_repeat_ngram_size": 4,
       "num_beams": 4,
-      "prefix": "Write a title and summarize: "
     }
   },
-  "use_cache": true,
   "vocab_size": 256384
 }

 {
+  "_name_or_path": "Joemgu/mlong-t5-base-sumstew",
   "architectures": [
     "LongT5ForConditionalGeneration"
   ],
   "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.0,
   "encoder_attention_type": "transient-global",
   "eos_token_id": 1,
   "feed_forward_proj": "gated-gelu",
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,
   "relative_attention_num_buckets": 32,
   "task_specific_params": {
     "summarization": {
       "early_stopping": true,
+      "encoder_no_repeat_ngram_size": 5,
       "length_penalty": 2.0,
       "max_length": 256,
       "min_length": 16,
       "no_repeat_ngram_size": 4,
       "num_beams": 4,
+      "prefix": "Write a title and summarize: ",
+      "repetition_penalty": 3.5
     }
   },
+  "tie_word_embeddings": false,
+  "torch_dtype": "float32",
+  "transformers_version": "4.30.0",
+  "use_cache": false,
   "vocab_size": 256384
 }

last-checkpoint/config.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-  "_name_or_path": "agemagician/mlong-t5-tglobal-base",
   "architectures": [
     "LongT5ForConditionalGeneration"
   ],
@@ -8,7 +8,7 @@
   "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "gelu_new",
-  "dropout_rate": 0.1,
   "encoder_attention_type": "transient-global",
   "eos_token_id": 1,
   "feed_forward_proj": "gated-gelu",
@@ -27,6 +27,19 @@
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,
   "relative_attention_num_buckets": 32,
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
   "transformers_version": "4.30.0",

 {
+  "_name_or_path": "Joemgu/mlong-t5-base-sumstew",
   "architectures": [
     "LongT5ForConditionalGeneration"
   ],
   "d_model": 768,
   "decoder_start_token_id": 0,
   "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.0,
   "encoder_attention_type": "transient-global",
   "eos_token_id": 1,
   "feed_forward_proj": "gated-gelu",
   "pad_token_id": 0,
   "relative_attention_max_distance": 128,
   "relative_attention_num_buckets": 32,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "encoder_no_repeat_ngram_size": 5,
+      "length_penalty": 2.0,
+      "max_length": 256,
+      "min_length": 16,
+      "no_repeat_ngram_size": 4,
+      "num_beams": 4,
+      "prefix": "Write a title and summarize: ",
+      "repetition_penalty": 3.5
+    }
+  },
   "tie_word_embeddings": false,
   "torch_dtype": "float32",
   "transformers_version": "4.30.0",

last-checkpoint/generation_config.json CHANGED Viewed

@@ -1,5 +1,6 @@
 {
   "decoder_start_token_id": 0,
   "max_length": 1024,
   "transformers_version": "4.30.0"
 }

 {
   "decoder_start_token_id": 0,
   "max_length": 1024,
+  "num_beams": 4,
   "transformers_version": "4.30.0"
 }

last-checkpoint/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f6de8b06ed44d4963b25bb376812b173405edc863a23e8867e0f736341d74a8d
 size 4736616809

 version https://git-lfs.github.com/spec/v1
+oid sha256:295a5920bae53deab38e6f92397053ad981c5d87986421287daf12cbf976c28a
 size 4736616809

last-checkpoint/pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02c85713dad1dcfeb5c32c03d2b6f448d20d550a87402f9d21be276100bce607
 size 2368281769

 version https://git-lfs.github.com/spec/v1
+oid sha256:8519e140daa38e7801673b244b2c2508c35bb63f95898abecfb02f2ae1506410
 size 2368281769

last-checkpoint/rng_state.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0a2eb101bb6b101af129672a2bea24581932e36cd77fc392a7cda646daab49ff
 size 14575

 version https://git-lfs.github.com/spec/v1
+oid sha256:f9ca352cb22c2e874355bb7654391e036047a8bceae9ffa8161bf19e850e7704
 size 14575

last-checkpoint/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cabb275feefad232b9c910702270f8d2a4ae8df759c0f8aeaf1aba436940e944
 size 627

 version https://git-lfs.github.com/spec/v1
+oid sha256:dfed3e8b1bba7802f62911f65b760f66c94bd2067f28ece04f4710d040c1fa11
 size 627

last-checkpoint/tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1aeab8f9f01e0f5c8f9b8e06e3c1546696dff62a47a6729a0f91aaa24c98da4
-size 16795620

 version https://git-lfs.github.com/spec/v1
+oid sha256:750ffbacc45c2f284f16da1d281fedfe2ed16f956306c0999ccaeb7b08554793
+size 16780024

last-checkpoint/trainer_state.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

last-checkpoint/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:359df1e59d8152b6ac8b970534543aecdaf2a516151d0c7925f6ee4b8bebd509
 size 5371

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cb1ee3100dd5731f643b0702779cb0f885a239198814a1b5fd38a1b3541b399
 size 5371

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:02c85713dad1dcfeb5c32c03d2b6f448d20d550a87402f9d21be276100bce607
 size 2368281769

 version https://git-lfs.github.com/spec/v1
+oid sha256:8519e140daa38e7801673b244b2c2508c35bb63f95898abecfb02f2ae1506410
 size 2368281769

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c1aeab8f9f01e0f5c8f9b8e06e3c1546696dff62a47a6729a0f91aaa24c98da4
-size 16795620

 version https://git-lfs.github.com/spec/v1
+oid sha256:750ffbacc45c2f284f16da1d281fedfe2ed16f956306c0999ccaeb7b08554793
+size 16780024

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:359df1e59d8152b6ac8b970534543aecdaf2a516151d0c7925f6ee4b8bebd509
 size 5371

 version https://git-lfs.github.com/spec/v1
+oid sha256:0cb1ee3100dd5731f643b0702779cb0f885a239198814a1b5fd38a1b3541b399
 size 5371