Amin commited on
Commit
698009f
1 Parent(s): be614e1

First Model version

Browse files
config.json ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "facebook/bart-large-cnn",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "gelu",
6
+ "add_final_layer_norm": false,
7
+ "architectures": [
8
+ "BartForConditionalGeneration"
9
+ ],
10
+ "attention_dropout": 0.0,
11
+ "bos_token_id": 0,
12
+ "classif_dropout": 0.0,
13
+ "classifier_dropout": 0.0,
14
+ "d_model": 1024,
15
+ "decoder_attention_heads": 16,
16
+ "decoder_ffn_dim": 4096,
17
+ "decoder_layerdrop": 0.0,
18
+ "decoder_layers": 12,
19
+ "decoder_start_token_id": 2,
20
+ "dropout": 0.1,
21
+ "early_stopping": true,
22
+ "encoder_attention_heads": 16,
23
+ "encoder_ffn_dim": 4096,
24
+ "encoder_layerdrop": 0.0,
25
+ "encoder_layers": 12,
26
+ "eos_token_id": 2,
27
+ "force_bos_token_to_be_generated": true,
28
+ "forced_bos_token_id": 0,
29
+ "forced_eos_token_id": 2,
30
+ "gradient_checkpointing": false,
31
+ "id2label": {
32
+ "0": "LABEL_0",
33
+ "1": "LABEL_1",
34
+ "2": "LABEL_2"
35
+ },
36
+ "init_std": 0.02,
37
+ "is_encoder_decoder": true,
38
+ "label2id": {
39
+ "LABEL_0": 0,
40
+ "LABEL_1": 1,
41
+ "LABEL_2": 2
42
+ },
43
+ "length_penalty": 2.0,
44
+ "max_length": 142,
45
+ "max_position_embeddings": 1024,
46
+ "min_length": 56,
47
+ "model_type": "bart",
48
+ "no_repeat_ngram_size": 3,
49
+ "normalize_before": false,
50
+ "num_beams": 4,
51
+ "num_hidden_layers": 12,
52
+ "output_past": true,
53
+ "pad_token_id": 1,
54
+ "prefix": " ",
55
+ "scale_embedding": false,
56
+ "task_specific_params": {
57
+ "summarization": {
58
+ "early_stopping": true,
59
+ "length_penalty": 2.0,
60
+ "max_length": 142,
61
+ "min_length": 56,
62
+ "no_repeat_ngram_size": 3,
63
+ "num_beams": 4
64
+ }
65
+ },
66
+ "torch_dtype": "float32",
67
+ "transformers_version": "4.21.2",
68
+ "use_cache": true,
69
+ "vocab_size": 50264
70
+ }
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb2903707b9b94f9de0e5ef0e3730cdb8f66e4fb68f217b4a35dd3090c63e61
3
+ size 3250622363
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5833792f36db034a96aabad34178464b3fd9c11a29f3eb5813b154a69bb5aeed
3
+ size 1625533697
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e46a1afb0a9e4ba18a1c552f8f8779cec2c4380842e661c543a50bc20512e2ca
3
+ size 14503
scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42d147001394f6b9b8f8a05883d1e57c0aa94311bf0ff4c5ed15f2c6ebf2407f
3
+ size 559
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c46834bee64affd7ab8534e1575af8f9280c566675afe308fe267284f91e6cd
3
+ size 623
special_tokens_map.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "cls_token": "<s>",
4
+ "eos_token": "</s>",
5
+ "mask_token": {
6
+ "content": "<mask>",
7
+ "lstrip": true,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "pad_token": "<pad>",
13
+ "sep_token": "</s>",
14
+ "unk_token": "<unk>"
15
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "bos_token": "<s>",
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "errors": "replace",
7
+ "mask_token": "<mask>",
8
+ "model_max_length": 1024,
9
+ "name_or_path": "facebook/bart-large-cnn",
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "special_tokens_map_file": null,
13
+ "tokenizer_class": "BartTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,2402 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 2.0502805709838867,
3
+ "best_model_checkpoint": "results/models/bart-large-cnn-NewsRoomSmall/checkpoint-48701",
4
+ "epoch": 4.0,
5
+ "global_step": 194804,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.01,
12
+ "learning_rate": 1.9949077021005728e-05,
13
+ "loss": 2.0989,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.02,
18
+ "learning_rate": 1.9897743372826023e-05,
19
+ "loss": 2.0245,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.03,
24
+ "learning_rate": 1.9846409724646314e-05,
25
+ "loss": 1.9898,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.04,
30
+ "learning_rate": 1.9795076076466605e-05,
31
+ "loss": 1.9769,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.05,
36
+ "learning_rate": 1.9743742428286896e-05,
37
+ "loss": 1.9832,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.06,
42
+ "learning_rate": 1.9692408780107187e-05,
43
+ "loss": 1.9371,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.07,
48
+ "learning_rate": 1.9641177799223837e-05,
49
+ "loss": 1.9403,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.08,
54
+ "learning_rate": 1.958984415104413e-05,
55
+ "loss": 1.9412,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.09,
60
+ "learning_rate": 1.953851050286442e-05,
61
+ "loss": 1.9081,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.1,
66
+ "learning_rate": 1.948717685468471e-05,
67
+ "loss": 1.904,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.11,
72
+ "learning_rate": 1.9435843206505e-05,
73
+ "loss": 1.8917,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.12,
78
+ "learning_rate": 1.9384612225621652e-05,
79
+ "loss": 1.8901,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.13,
84
+ "learning_rate": 1.9333278577441943e-05,
85
+ "loss": 1.9037,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.14,
90
+ "learning_rate": 1.9281944929262234e-05,
91
+ "loss": 1.9027,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.15,
96
+ "learning_rate": 1.9230611281082525e-05,
97
+ "loss": 1.8839,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.16,
102
+ "learning_rate": 1.9179277632902817e-05,
103
+ "loss": 1.8959,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.17,
108
+ "learning_rate": 1.9128046652019467e-05,
109
+ "loss": 1.9039,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.18,
114
+ "learning_rate": 1.9076713003839758e-05,
115
+ "loss": 1.874,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.2,
120
+ "learning_rate": 1.902537935566005e-05,
121
+ "loss": 1.8835,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 0.21,
126
+ "learning_rate": 1.897404570748034e-05,
127
+ "loss": 1.8617,
128
+ "step": 10000
129
+ },
130
+ {
131
+ "epoch": 0.22,
132
+ "learning_rate": 1.892271205930063e-05,
133
+ "loss": 1.8594,
134
+ "step": 10500
135
+ },
136
+ {
137
+ "epoch": 0.23,
138
+ "learning_rate": 1.8871378411120926e-05,
139
+ "loss": 1.8483,
140
+ "step": 11000
141
+ },
142
+ {
143
+ "epoch": 0.24,
144
+ "learning_rate": 1.8820044762941213e-05,
145
+ "loss": 1.873,
146
+ "step": 11500
147
+ },
148
+ {
149
+ "epoch": 0.25,
150
+ "learning_rate": 1.8768813782057864e-05,
151
+ "loss": 1.8425,
152
+ "step": 12000
153
+ },
154
+ {
155
+ "epoch": 0.26,
156
+ "learning_rate": 1.8717480133878155e-05,
157
+ "loss": 1.834,
158
+ "step": 12500
159
+ },
160
+ {
161
+ "epoch": 0.27,
162
+ "learning_rate": 1.8666146485698446e-05,
163
+ "loss": 1.8399,
164
+ "step": 13000
165
+ },
166
+ {
167
+ "epoch": 0.28,
168
+ "learning_rate": 1.8614812837518737e-05,
169
+ "loss": 1.8352,
170
+ "step": 13500
171
+ },
172
+ {
173
+ "epoch": 0.29,
174
+ "learning_rate": 1.856347918933903e-05,
175
+ "loss": 1.8401,
176
+ "step": 14000
177
+ },
178
+ {
179
+ "epoch": 0.3,
180
+ "learning_rate": 1.851214554115932e-05,
181
+ "loss": 1.8416,
182
+ "step": 14500
183
+ },
184
+ {
185
+ "epoch": 0.31,
186
+ "learning_rate": 1.846081189297961e-05,
187
+ "loss": 1.8486,
188
+ "step": 15000
189
+ },
190
+ {
191
+ "epoch": 0.32,
192
+ "learning_rate": 1.8409478244799905e-05,
193
+ "loss": 1.8383,
194
+ "step": 15500
195
+ },
196
+ {
197
+ "epoch": 0.33,
198
+ "learning_rate": 1.8358247263916552e-05,
199
+ "loss": 1.8253,
200
+ "step": 16000
201
+ },
202
+ {
203
+ "epoch": 0.34,
204
+ "learning_rate": 1.8306913615736847e-05,
205
+ "loss": 1.8373,
206
+ "step": 16500
207
+ },
208
+ {
209
+ "epoch": 0.35,
210
+ "learning_rate": 1.8255579967557134e-05,
211
+ "loss": 1.8161,
212
+ "step": 17000
213
+ },
214
+ {
215
+ "epoch": 0.36,
216
+ "learning_rate": 1.820424631937743e-05,
217
+ "loss": 1.8242,
218
+ "step": 17500
219
+ },
220
+ {
221
+ "epoch": 0.37,
222
+ "learning_rate": 1.8152912671197716e-05,
223
+ "loss": 1.8319,
224
+ "step": 18000
225
+ },
226
+ {
227
+ "epoch": 0.38,
228
+ "learning_rate": 1.810157902301801e-05,
229
+ "loss": 1.8144,
230
+ "step": 18500
231
+ },
232
+ {
233
+ "epoch": 0.39,
234
+ "learning_rate": 1.80502453748383e-05,
235
+ "loss": 1.8316,
236
+ "step": 19000
237
+ },
238
+ {
239
+ "epoch": 0.4,
240
+ "learning_rate": 1.7998911726658593e-05,
241
+ "loss": 1.8223,
242
+ "step": 19500
243
+ },
244
+ {
245
+ "epoch": 0.41,
246
+ "learning_rate": 1.7947680745775244e-05,
247
+ "loss": 1.8062,
248
+ "step": 20000
249
+ },
250
+ {
251
+ "epoch": 0.42,
252
+ "learning_rate": 1.789644976489189e-05,
253
+ "loss": 1.8184,
254
+ "step": 20500
255
+ },
256
+ {
257
+ "epoch": 0.43,
258
+ "learning_rate": 1.7845116116712185e-05,
259
+ "loss": 1.8149,
260
+ "step": 21000
261
+ },
262
+ {
263
+ "epoch": 0.44,
264
+ "learning_rate": 1.7793782468532473e-05,
265
+ "loss": 1.8169,
266
+ "step": 21500
267
+ },
268
+ {
269
+ "epoch": 0.45,
270
+ "learning_rate": 1.7742448820352767e-05,
271
+ "loss": 1.8133,
272
+ "step": 22000
273
+ },
274
+ {
275
+ "epoch": 0.46,
276
+ "learning_rate": 1.7691115172173055e-05,
277
+ "loss": 1.818,
278
+ "step": 22500
279
+ },
280
+ {
281
+ "epoch": 0.47,
282
+ "learning_rate": 1.763988419128971e-05,
283
+ "loss": 1.8213,
284
+ "step": 23000
285
+ },
286
+ {
287
+ "epoch": 0.48,
288
+ "learning_rate": 1.758855054311e-05,
289
+ "loss": 1.8048,
290
+ "step": 23500
291
+ },
292
+ {
293
+ "epoch": 0.49,
294
+ "learning_rate": 1.753721689493029e-05,
295
+ "loss": 1.7975,
296
+ "step": 24000
297
+ },
298
+ {
299
+ "epoch": 0.5,
300
+ "learning_rate": 1.7485883246750582e-05,
301
+ "loss": 1.8023,
302
+ "step": 24500
303
+ },
304
+ {
305
+ "epoch": 0.51,
306
+ "learning_rate": 1.743465226586723e-05,
307
+ "loss": 1.8034,
308
+ "step": 25000
309
+ },
310
+ {
311
+ "epoch": 0.52,
312
+ "learning_rate": 1.7383318617687524e-05,
313
+ "loss": 1.8102,
314
+ "step": 25500
315
+ },
316
+ {
317
+ "epoch": 0.53,
318
+ "learning_rate": 1.7331984969507815e-05,
319
+ "loss": 1.7886,
320
+ "step": 26000
321
+ },
322
+ {
323
+ "epoch": 0.54,
324
+ "learning_rate": 1.7280651321328106e-05,
325
+ "loss": 1.798,
326
+ "step": 26500
327
+ },
328
+ {
329
+ "epoch": 0.55,
330
+ "learning_rate": 1.7229420340444757e-05,
331
+ "loss": 1.8022,
332
+ "step": 27000
333
+ },
334
+ {
335
+ "epoch": 0.56,
336
+ "learning_rate": 1.7178086692265048e-05,
337
+ "loss": 1.7992,
338
+ "step": 27500
339
+ },
340
+ {
341
+ "epoch": 0.57,
342
+ "learning_rate": 1.712675304408534e-05,
343
+ "loss": 1.7915,
344
+ "step": 28000
345
+ },
346
+ {
347
+ "epoch": 0.59,
348
+ "learning_rate": 1.707541939590563e-05,
349
+ "loss": 1.7804,
350
+ "step": 28500
351
+ },
352
+ {
353
+ "epoch": 0.6,
354
+ "learning_rate": 1.702408574772592e-05,
355
+ "loss": 1.7699,
356
+ "step": 29000
357
+ },
358
+ {
359
+ "epoch": 0.61,
360
+ "learning_rate": 1.6972854766842572e-05,
361
+ "loss": 1.7826,
362
+ "step": 29500
363
+ },
364
+ {
365
+ "epoch": 0.62,
366
+ "learning_rate": 1.6921623785959223e-05,
367
+ "loss": 1.7691,
368
+ "step": 30000
369
+ },
370
+ {
371
+ "epoch": 0.63,
372
+ "learning_rate": 1.6870290137779514e-05,
373
+ "loss": 1.7945,
374
+ "step": 30500
375
+ },
376
+ {
377
+ "epoch": 0.64,
378
+ "learning_rate": 1.6818956489599805e-05,
379
+ "loss": 1.7861,
380
+ "step": 31000
381
+ },
382
+ {
383
+ "epoch": 0.65,
384
+ "learning_rate": 1.6767622841420096e-05,
385
+ "loss": 1.7607,
386
+ "step": 31500
387
+ },
388
+ {
389
+ "epoch": 0.66,
390
+ "learning_rate": 1.6716289193240387e-05,
391
+ "loss": 1.7674,
392
+ "step": 32000
393
+ },
394
+ {
395
+ "epoch": 0.67,
396
+ "learning_rate": 1.6664955545060678e-05,
397
+ "loss": 1.7817,
398
+ "step": 32500
399
+ },
400
+ {
401
+ "epoch": 0.68,
402
+ "learning_rate": 1.661362189688097e-05,
403
+ "loss": 1.7751,
404
+ "step": 33000
405
+ },
406
+ {
407
+ "epoch": 0.69,
408
+ "learning_rate": 1.656228824870126e-05,
409
+ "loss": 1.7648,
410
+ "step": 33500
411
+ },
412
+ {
413
+ "epoch": 0.7,
414
+ "learning_rate": 1.651095460052155e-05,
415
+ "loss": 1.7611,
416
+ "step": 34000
417
+ },
418
+ {
419
+ "epoch": 0.71,
420
+ "learning_rate": 1.6459620952341842e-05,
421
+ "loss": 1.7567,
422
+ "step": 34500
423
+ },
424
+ {
425
+ "epoch": 0.72,
426
+ "learning_rate": 1.6408389971458493e-05,
427
+ "loss": 1.7598,
428
+ "step": 35000
429
+ },
430
+ {
431
+ "epoch": 0.73,
432
+ "learning_rate": 1.6357056323278784e-05,
433
+ "loss": 1.7591,
434
+ "step": 35500
435
+ },
436
+ {
437
+ "epoch": 0.74,
438
+ "learning_rate": 1.6305722675099075e-05,
439
+ "loss": 1.7789,
440
+ "step": 36000
441
+ },
442
+ {
443
+ "epoch": 0.75,
444
+ "learning_rate": 1.6254491694215725e-05,
445
+ "loss": 1.7779,
446
+ "step": 36500
447
+ },
448
+ {
449
+ "epoch": 0.76,
450
+ "learning_rate": 1.6203158046036016e-05,
451
+ "loss": 1.7388,
452
+ "step": 37000
453
+ },
454
+ {
455
+ "epoch": 0.77,
456
+ "learning_rate": 1.6151824397856308e-05,
457
+ "loss": 1.7636,
458
+ "step": 37500
459
+ },
460
+ {
461
+ "epoch": 0.78,
462
+ "learning_rate": 1.61004907496766e-05,
463
+ "loss": 1.7692,
464
+ "step": 38000
465
+ },
466
+ {
467
+ "epoch": 0.79,
468
+ "learning_rate": 1.604915710149689e-05,
469
+ "loss": 1.7595,
470
+ "step": 38500
471
+ },
472
+ {
473
+ "epoch": 0.8,
474
+ "learning_rate": 1.599782345331718e-05,
475
+ "loss": 1.7705,
476
+ "step": 39000
477
+ },
478
+ {
479
+ "epoch": 0.81,
480
+ "learning_rate": 1.594648980513747e-05,
481
+ "loss": 1.7357,
482
+ "step": 39500
483
+ },
484
+ {
485
+ "epoch": 0.82,
486
+ "learning_rate": 1.5895156156957766e-05,
487
+ "loss": 1.7714,
488
+ "step": 40000
489
+ },
490
+ {
491
+ "epoch": 0.83,
492
+ "learning_rate": 1.5843822508778054e-05,
493
+ "loss": 1.7471,
494
+ "step": 40500
495
+ },
496
+ {
497
+ "epoch": 0.84,
498
+ "learning_rate": 1.5792591527894704e-05,
499
+ "loss": 1.7513,
500
+ "step": 41000
501
+ },
502
+ {
503
+ "epoch": 0.85,
504
+ "learning_rate": 1.5741257879715e-05,
505
+ "loss": 1.7651,
506
+ "step": 41500
507
+ },
508
+ {
509
+ "epoch": 0.86,
510
+ "learning_rate": 1.5689924231535287e-05,
511
+ "loss": 1.7782,
512
+ "step": 42000
513
+ },
514
+ {
515
+ "epoch": 0.87,
516
+ "learning_rate": 1.563859058335558e-05,
517
+ "loss": 1.7573,
518
+ "step": 42500
519
+ },
520
+ {
521
+ "epoch": 0.88,
522
+ "learning_rate": 1.558725693517587e-05,
523
+ "loss": 1.7511,
524
+ "step": 43000
525
+ },
526
+ {
527
+ "epoch": 0.89,
528
+ "learning_rate": 1.5535923286996163e-05,
529
+ "loss": 1.7535,
530
+ "step": 43500
531
+ },
532
+ {
533
+ "epoch": 0.9,
534
+ "learning_rate": 1.548458963881645e-05,
535
+ "loss": 1.7525,
536
+ "step": 44000
537
+ },
538
+ {
539
+ "epoch": 0.91,
540
+ "learning_rate": 1.5433255990636745e-05,
541
+ "loss": 1.7629,
542
+ "step": 44500
543
+ },
544
+ {
545
+ "epoch": 0.92,
546
+ "learning_rate": 1.5382025009753392e-05,
547
+ "loss": 1.7534,
548
+ "step": 45000
549
+ },
550
+ {
551
+ "epoch": 0.93,
552
+ "learning_rate": 1.5330691361573687e-05,
553
+ "loss": 1.7444,
554
+ "step": 45500
555
+ },
556
+ {
557
+ "epoch": 0.94,
558
+ "learning_rate": 1.5279357713393978e-05,
559
+ "loss": 1.7479,
560
+ "step": 46000
561
+ },
562
+ {
563
+ "epoch": 0.95,
564
+ "learning_rate": 1.5228024065214269e-05,
565
+ "loss": 1.7384,
566
+ "step": 46500
567
+ },
568
+ {
569
+ "epoch": 0.97,
570
+ "learning_rate": 1.5176690417034558e-05,
571
+ "loss": 1.7526,
572
+ "step": 47000
573
+ },
574
+ {
575
+ "epoch": 0.98,
576
+ "learning_rate": 1.5125459436151209e-05,
577
+ "loss": 1.7422,
578
+ "step": 47500
579
+ },
580
+ {
581
+ "epoch": 0.99,
582
+ "learning_rate": 1.50741257879715e-05,
583
+ "loss": 1.7431,
584
+ "step": 48000
585
+ },
586
+ {
587
+ "epoch": 1.0,
588
+ "learning_rate": 1.5022792139791791e-05,
589
+ "loss": 1.7497,
590
+ "step": 48500
591
+ },
592
+ {
593
+ "epoch": 1.0,
594
+ "eval_gen_len": 58.64,
595
+ "eval_loss": 2.0502805709838867,
596
+ "eval_rouge1": 22.0185,
597
+ "eval_rouge2": 9.9661,
598
+ "eval_rougeL": 19.2037,
599
+ "eval_rougeLsum": 19.7843,
600
+ "eval_runtime": 17.9636,
601
+ "eval_samples_per_second": 5.567,
602
+ "eval_steps_per_second": 0.39,
603
+ "step": 48701
604
+ },
605
+ {
606
+ "epoch": 1.01,
607
+ "learning_rate": 1.4971458491612084e-05,
608
+ "loss": 1.6028,
609
+ "step": 49000
610
+ },
611
+ {
612
+ "epoch": 1.02,
613
+ "learning_rate": 1.4920124843432375e-05,
614
+ "loss": 1.4874,
615
+ "step": 49500
616
+ },
617
+ {
618
+ "epoch": 1.03,
619
+ "learning_rate": 1.4868791195252666e-05,
620
+ "loss": 1.5065,
621
+ "step": 50000
622
+ },
623
+ {
624
+ "epoch": 1.04,
625
+ "learning_rate": 1.4817560214369315e-05,
626
+ "loss": 1.4904,
627
+ "step": 50500
628
+ },
629
+ {
630
+ "epoch": 1.05,
631
+ "learning_rate": 1.4766226566189608e-05,
632
+ "loss": 1.505,
633
+ "step": 51000
634
+ },
635
+ {
636
+ "epoch": 1.06,
637
+ "learning_rate": 1.4714892918009897e-05,
638
+ "loss": 1.5026,
639
+ "step": 51500
640
+ },
641
+ {
642
+ "epoch": 1.07,
643
+ "learning_rate": 1.466355926983019e-05,
644
+ "loss": 1.5026,
645
+ "step": 52000
646
+ },
647
+ {
648
+ "epoch": 1.08,
649
+ "learning_rate": 1.4612225621650479e-05,
650
+ "loss": 1.5111,
651
+ "step": 52500
652
+ },
653
+ {
654
+ "epoch": 1.09,
655
+ "learning_rate": 1.4560891973470772e-05,
656
+ "loss": 1.5014,
657
+ "step": 53000
658
+ },
659
+ {
660
+ "epoch": 1.1,
661
+ "learning_rate": 1.4509558325291063e-05,
662
+ "loss": 1.502,
663
+ "step": 53500
664
+ },
665
+ {
666
+ "epoch": 1.11,
667
+ "learning_rate": 1.4458224677111354e-05,
668
+ "loss": 1.4973,
669
+ "step": 54000
670
+ },
671
+ {
672
+ "epoch": 1.12,
673
+ "learning_rate": 1.4406993696228005e-05,
674
+ "loss": 1.5042,
675
+ "step": 54500
676
+ },
677
+ {
678
+ "epoch": 1.13,
679
+ "learning_rate": 1.4355660048048297e-05,
680
+ "loss": 1.4914,
681
+ "step": 55000
682
+ },
683
+ {
684
+ "epoch": 1.14,
685
+ "learning_rate": 1.4304326399868587e-05,
686
+ "loss": 1.5026,
687
+ "step": 55500
688
+ },
689
+ {
690
+ "epoch": 1.15,
691
+ "learning_rate": 1.425299275168888e-05,
692
+ "loss": 1.4999,
693
+ "step": 56000
694
+ },
695
+ {
696
+ "epoch": 1.16,
697
+ "learning_rate": 1.4201761770805528e-05,
698
+ "loss": 1.5077,
699
+ "step": 56500
700
+ },
701
+ {
702
+ "epoch": 1.17,
703
+ "learning_rate": 1.415042812262582e-05,
704
+ "loss": 1.4962,
705
+ "step": 57000
706
+ },
707
+ {
708
+ "epoch": 1.18,
709
+ "learning_rate": 1.409909447444611e-05,
710
+ "loss": 1.5074,
711
+ "step": 57500
712
+ },
713
+ {
714
+ "epoch": 1.19,
715
+ "learning_rate": 1.4047760826266402e-05,
716
+ "loss": 1.5008,
717
+ "step": 58000
718
+ },
719
+ {
720
+ "epoch": 1.2,
721
+ "learning_rate": 1.3996529845383054e-05,
722
+ "loss": 1.5076,
723
+ "step": 58500
724
+ },
725
+ {
726
+ "epoch": 1.21,
727
+ "learning_rate": 1.3945196197203343e-05,
728
+ "loss": 1.5088,
729
+ "step": 59000
730
+ },
731
+ {
732
+ "epoch": 1.22,
733
+ "learning_rate": 1.3893862549023636e-05,
734
+ "loss": 1.4962,
735
+ "step": 59500
736
+ },
737
+ {
738
+ "epoch": 1.23,
739
+ "learning_rate": 1.3842528900843925e-05,
740
+ "loss": 1.4917,
741
+ "step": 60000
742
+ },
743
+ {
744
+ "epoch": 1.24,
745
+ "learning_rate": 1.3791195252664218e-05,
746
+ "loss": 1.4992,
747
+ "step": 60500
748
+ },
749
+ {
750
+ "epoch": 1.25,
751
+ "learning_rate": 1.3739964271780867e-05,
752
+ "loss": 1.4963,
753
+ "step": 61000
754
+ },
755
+ {
756
+ "epoch": 1.26,
757
+ "learning_rate": 1.368863062360116e-05,
758
+ "loss": 1.5008,
759
+ "step": 61500
760
+ },
761
+ {
762
+ "epoch": 1.27,
763
+ "learning_rate": 1.3637296975421451e-05,
764
+ "loss": 1.5009,
765
+ "step": 62000
766
+ },
767
+ {
768
+ "epoch": 1.28,
769
+ "learning_rate": 1.3585963327241742e-05,
770
+ "loss": 1.5127,
771
+ "step": 62500
772
+ },
773
+ {
774
+ "epoch": 1.29,
775
+ "learning_rate": 1.3534629679062033e-05,
776
+ "loss": 1.5035,
777
+ "step": 63000
778
+ },
779
+ {
780
+ "epoch": 1.3,
781
+ "learning_rate": 1.3483398698178682e-05,
782
+ "loss": 1.5129,
783
+ "step": 63500
784
+ },
785
+ {
786
+ "epoch": 1.31,
787
+ "learning_rate": 1.3432065049998975e-05,
788
+ "loss": 1.5134,
789
+ "step": 64000
790
+ },
791
+ {
792
+ "epoch": 1.32,
793
+ "learning_rate": 1.3380731401819264e-05,
794
+ "loss": 1.5131,
795
+ "step": 64500
796
+ },
797
+ {
798
+ "epoch": 1.33,
799
+ "learning_rate": 1.3329397753639557e-05,
800
+ "loss": 1.5008,
801
+ "step": 65000
802
+ },
803
+ {
804
+ "epoch": 1.34,
805
+ "learning_rate": 1.3278064105459846e-05,
806
+ "loss": 1.5022,
807
+ "step": 65500
808
+ },
809
+ {
810
+ "epoch": 1.36,
811
+ "learning_rate": 1.3226935791872857e-05,
812
+ "loss": 1.5003,
813
+ "step": 66000
814
+ },
815
+ {
816
+ "epoch": 1.37,
817
+ "learning_rate": 1.317560214369315e-05,
818
+ "loss": 1.519,
819
+ "step": 66500
820
+ },
821
+ {
822
+ "epoch": 1.38,
823
+ "learning_rate": 1.3124268495513439e-05,
824
+ "loss": 1.5058,
825
+ "step": 67000
826
+ },
827
+ {
828
+ "epoch": 1.39,
829
+ "learning_rate": 1.3072934847333732e-05,
830
+ "loss": 1.5252,
831
+ "step": 67500
832
+ },
833
+ {
834
+ "epoch": 1.4,
835
+ "learning_rate": 1.3021601199154024e-05,
836
+ "loss": 1.5102,
837
+ "step": 68000
838
+ },
839
+ {
840
+ "epoch": 1.41,
841
+ "learning_rate": 1.2970267550974314e-05,
842
+ "loss": 1.5059,
843
+ "step": 68500
844
+ },
845
+ {
846
+ "epoch": 1.42,
847
+ "learning_rate": 1.2918933902794606e-05,
848
+ "loss": 1.508,
849
+ "step": 69000
850
+ },
851
+ {
852
+ "epoch": 1.43,
853
+ "learning_rate": 1.2867600254614896e-05,
854
+ "loss": 1.4942,
855
+ "step": 69500
856
+ },
857
+ {
858
+ "epoch": 1.44,
859
+ "learning_rate": 1.2816266606435188e-05,
860
+ "loss": 1.5167,
861
+ "step": 70000
862
+ },
863
+ {
864
+ "epoch": 1.45,
865
+ "learning_rate": 1.2765035625551837e-05,
866
+ "loss": 1.5181,
867
+ "step": 70500
868
+ },
869
+ {
870
+ "epoch": 1.46,
871
+ "learning_rate": 1.2713701977372128e-05,
872
+ "loss": 1.508,
873
+ "step": 71000
874
+ },
875
+ {
876
+ "epoch": 1.47,
877
+ "learning_rate": 1.2662368329192421e-05,
878
+ "loss": 1.5116,
879
+ "step": 71500
880
+ },
881
+ {
882
+ "epoch": 1.48,
883
+ "learning_rate": 1.261103468101271e-05,
884
+ "loss": 1.5212,
885
+ "step": 72000
886
+ },
887
+ {
888
+ "epoch": 1.49,
889
+ "learning_rate": 1.2559701032833003e-05,
890
+ "loss": 1.4879,
891
+ "step": 72500
892
+ },
893
+ {
894
+ "epoch": 1.5,
895
+ "learning_rate": 1.2508470051949652e-05,
896
+ "loss": 1.5121,
897
+ "step": 73000
898
+ },
899
+ {
900
+ "epoch": 1.51,
901
+ "learning_rate": 1.2457136403769945e-05,
902
+ "loss": 1.5094,
903
+ "step": 73500
904
+ },
905
+ {
906
+ "epoch": 1.52,
907
+ "learning_rate": 1.2405802755590234e-05,
908
+ "loss": 1.511,
909
+ "step": 74000
910
+ },
911
+ {
912
+ "epoch": 1.53,
913
+ "learning_rate": 1.2354469107410527e-05,
914
+ "loss": 1.512,
915
+ "step": 74500
916
+ },
917
+ {
918
+ "epoch": 1.54,
919
+ "learning_rate": 1.2303135459230816e-05,
920
+ "loss": 1.4996,
921
+ "step": 75000
922
+ },
923
+ {
924
+ "epoch": 1.55,
925
+ "learning_rate": 1.2251904478347467e-05,
926
+ "loss": 1.5122,
927
+ "step": 75500
928
+ },
929
+ {
930
+ "epoch": 1.56,
931
+ "learning_rate": 1.220057083016776e-05,
932
+ "loss": 1.5067,
933
+ "step": 76000
934
+ },
935
+ {
936
+ "epoch": 1.57,
937
+ "learning_rate": 1.2149339849284409e-05,
938
+ "loss": 1.5081,
939
+ "step": 76500
940
+ },
941
+ {
942
+ "epoch": 1.58,
943
+ "learning_rate": 1.2098006201104702e-05,
944
+ "loss": 1.4931,
945
+ "step": 77000
946
+ },
947
+ {
948
+ "epoch": 1.59,
949
+ "learning_rate": 1.2046672552924991e-05,
950
+ "loss": 1.5017,
951
+ "step": 77500
952
+ },
953
+ {
954
+ "epoch": 1.6,
955
+ "learning_rate": 1.1995338904745284e-05,
956
+ "loss": 1.4951,
957
+ "step": 78000
958
+ },
959
+ {
960
+ "epoch": 1.61,
961
+ "learning_rate": 1.1944005256565573e-05,
962
+ "loss": 1.5041,
963
+ "step": 78500
964
+ },
965
+ {
966
+ "epoch": 1.62,
967
+ "learning_rate": 1.1892671608385866e-05,
968
+ "loss": 1.4977,
969
+ "step": 79000
970
+ },
971
+ {
972
+ "epoch": 1.63,
973
+ "learning_rate": 1.1841337960206157e-05,
974
+ "loss": 1.517,
975
+ "step": 79500
976
+ },
977
+ {
978
+ "epoch": 1.64,
979
+ "learning_rate": 1.1790004312026448e-05,
980
+ "loss": 1.5056,
981
+ "step": 80000
982
+ },
983
+ {
984
+ "epoch": 1.65,
985
+ "learning_rate": 1.1738670663846739e-05,
986
+ "loss": 1.4893,
987
+ "step": 80500
988
+ },
989
+ {
990
+ "epoch": 1.66,
991
+ "learning_rate": 1.168733701566703e-05,
992
+ "loss": 1.5125,
993
+ "step": 81000
994
+ },
995
+ {
996
+ "epoch": 1.67,
997
+ "learning_rate": 1.163610603478368e-05,
998
+ "loss": 1.5074,
999
+ "step": 81500
1000
+ },
1001
+ {
1002
+ "epoch": 1.68,
1003
+ "learning_rate": 1.1584772386603973e-05,
1004
+ "loss": 1.5206,
1005
+ "step": 82000
1006
+ },
1007
+ {
1008
+ "epoch": 1.69,
1009
+ "learning_rate": 1.1533438738424263e-05,
1010
+ "loss": 1.4934,
1011
+ "step": 82500
1012
+ },
1013
+ {
1014
+ "epoch": 1.7,
1015
+ "learning_rate": 1.1482105090244556e-05,
1016
+ "loss": 1.5052,
1017
+ "step": 83000
1018
+ },
1019
+ {
1020
+ "epoch": 1.71,
1021
+ "learning_rate": 1.1430771442064845e-05,
1022
+ "loss": 1.5036,
1023
+ "step": 83500
1024
+ },
1025
+ {
1026
+ "epoch": 1.72,
1027
+ "learning_rate": 1.1379437793885138e-05,
1028
+ "loss": 1.516,
1029
+ "step": 84000
1030
+ },
1031
+ {
1032
+ "epoch": 1.74,
1033
+ "learning_rate": 1.1328206813001787e-05,
1034
+ "loss": 1.4903,
1035
+ "step": 84500
1036
+ },
1037
+ {
1038
+ "epoch": 1.75,
1039
+ "learning_rate": 1.1276873164822078e-05,
1040
+ "loss": 1.4999,
1041
+ "step": 85000
1042
+ },
1043
+ {
1044
+ "epoch": 1.76,
1045
+ "learning_rate": 1.122553951664237e-05,
1046
+ "loss": 1.4822,
1047
+ "step": 85500
1048
+ },
1049
+ {
1050
+ "epoch": 1.77,
1051
+ "learning_rate": 1.117420586846266e-05,
1052
+ "loss": 1.5073,
1053
+ "step": 86000
1054
+ },
1055
+ {
1056
+ "epoch": 1.78,
1057
+ "learning_rate": 1.1122974887579312e-05,
1058
+ "loss": 1.5158,
1059
+ "step": 86500
1060
+ },
1061
+ {
1062
+ "epoch": 1.79,
1063
+ "learning_rate": 1.1071641239399602e-05,
1064
+ "loss": 1.4907,
1065
+ "step": 87000
1066
+ },
1067
+ {
1068
+ "epoch": 1.8,
1069
+ "learning_rate": 1.1020410258516254e-05,
1070
+ "loss": 1.5028,
1071
+ "step": 87500
1072
+ },
1073
+ {
1074
+ "epoch": 1.81,
1075
+ "learning_rate": 1.0969076610336545e-05,
1076
+ "loss": 1.4878,
1077
+ "step": 88000
1078
+ },
1079
+ {
1080
+ "epoch": 1.82,
1081
+ "learning_rate": 1.0917742962156836e-05,
1082
+ "loss": 1.5032,
1083
+ "step": 88500
1084
+ },
1085
+ {
1086
+ "epoch": 1.83,
1087
+ "learning_rate": 1.0866409313977127e-05,
1088
+ "loss": 1.5034,
1089
+ "step": 89000
1090
+ },
1091
+ {
1092
+ "epoch": 1.84,
1093
+ "learning_rate": 1.0815075665797418e-05,
1094
+ "loss": 1.5007,
1095
+ "step": 89500
1096
+ },
1097
+ {
1098
+ "epoch": 1.85,
1099
+ "learning_rate": 1.076374201761771e-05,
1100
+ "loss": 1.5083,
1101
+ "step": 90000
1102
+ },
1103
+ {
1104
+ "epoch": 1.86,
1105
+ "learning_rate": 1.0712408369438e-05,
1106
+ "loss": 1.5119,
1107
+ "step": 90500
1108
+ },
1109
+ {
1110
+ "epoch": 1.87,
1111
+ "learning_rate": 1.0661074721258291e-05,
1112
+ "loss": 1.5026,
1113
+ "step": 91000
1114
+ },
1115
+ {
1116
+ "epoch": 1.88,
1117
+ "learning_rate": 1.060984374037494e-05,
1118
+ "loss": 1.4866,
1119
+ "step": 91500
1120
+ },
1121
+ {
1122
+ "epoch": 1.89,
1123
+ "learning_rate": 1.0558612759491593e-05,
1124
+ "loss": 1.5006,
1125
+ "step": 92000
1126
+ },
1127
+ {
1128
+ "epoch": 1.9,
1129
+ "learning_rate": 1.0507279111311884e-05,
1130
+ "loss": 1.5035,
1131
+ "step": 92500
1132
+ },
1133
+ {
1134
+ "epoch": 1.91,
1135
+ "learning_rate": 1.0455945463132175e-05,
1136
+ "loss": 1.4945,
1137
+ "step": 93000
1138
+ },
1139
+ {
1140
+ "epoch": 1.92,
1141
+ "learning_rate": 1.0404611814952466e-05,
1142
+ "loss": 1.5115,
1143
+ "step": 93500
1144
+ },
1145
+ {
1146
+ "epoch": 1.93,
1147
+ "learning_rate": 1.0353278166772759e-05,
1148
+ "loss": 1.4946,
1149
+ "step": 94000
1150
+ },
1151
+ {
1152
+ "epoch": 1.94,
1153
+ "learning_rate": 1.0301944518593048e-05,
1154
+ "loss": 1.4847,
1155
+ "step": 94500
1156
+ },
1157
+ {
1158
+ "epoch": 1.95,
1159
+ "learning_rate": 1.025061087041334e-05,
1160
+ "loss": 1.4912,
1161
+ "step": 95000
1162
+ },
1163
+ {
1164
+ "epoch": 1.96,
1165
+ "learning_rate": 1.019927722223363e-05,
1166
+ "loss": 1.4912,
1167
+ "step": 95500
1168
+ },
1169
+ {
1170
+ "epoch": 1.97,
1171
+ "learning_rate": 1.0147943574053923e-05,
1172
+ "loss": 1.4949,
1173
+ "step": 96000
1174
+ },
1175
+ {
1176
+ "epoch": 1.98,
1177
+ "learning_rate": 1.0096712593170572e-05,
1178
+ "loss": 1.5093,
1179
+ "step": 96500
1180
+ },
1181
+ {
1182
+ "epoch": 1.99,
1183
+ "learning_rate": 1.0045378944990864e-05,
1184
+ "loss": 1.505,
1185
+ "step": 97000
1186
+ },
1187
+ {
1188
+ "epoch": 2.0,
1189
+ "eval_gen_len": 58.69,
1190
+ "eval_loss": 2.0827815532684326,
1191
+ "eval_rouge1": 23.2685,
1192
+ "eval_rouge2": 10.8781,
1193
+ "eval_rougeL": 20.0935,
1194
+ "eval_rougeLsum": 20.5489,
1195
+ "eval_runtime": 18.0831,
1196
+ "eval_samples_per_second": 5.53,
1197
+ "eval_steps_per_second": 0.387,
1198
+ "step": 97402
1199
+ },
1200
+ {
1201
+ "epoch": 2.0,
1202
+ "learning_rate": 9.994045296811154e-06,
1203
+ "loss": 1.4514,
1204
+ "step": 97500
1205
+ },
1206
+ {
1207
+ "epoch": 2.01,
1208
+ "learning_rate": 9.942711648631445e-06,
1209
+ "loss": 1.2686,
1210
+ "step": 98000
1211
+ },
1212
+ {
1213
+ "epoch": 2.02,
1214
+ "learning_rate": 9.891480667748097e-06,
1215
+ "loss": 1.269,
1216
+ "step": 98500
1217
+ },
1218
+ {
1219
+ "epoch": 2.03,
1220
+ "learning_rate": 9.840147019568388e-06,
1221
+ "loss": 1.2746,
1222
+ "step": 99000
1223
+ },
1224
+ {
1225
+ "epoch": 2.04,
1226
+ "learning_rate": 9.78881337138868e-06,
1227
+ "loss": 1.2654,
1228
+ "step": 99500
1229
+ },
1230
+ {
1231
+ "epoch": 2.05,
1232
+ "learning_rate": 9.73747972320897e-06,
1233
+ "loss": 1.2771,
1234
+ "step": 100000
1235
+ },
1236
+ {
1237
+ "epoch": 2.06,
1238
+ "learning_rate": 9.68624874232562e-06,
1239
+ "loss": 1.2854,
1240
+ "step": 100500
1241
+ },
1242
+ {
1243
+ "epoch": 2.07,
1244
+ "learning_rate": 9.63491509414591e-06,
1245
+ "loss": 1.269,
1246
+ "step": 101000
1247
+ },
1248
+ {
1249
+ "epoch": 2.08,
1250
+ "learning_rate": 9.583581445966203e-06,
1251
+ "loss": 1.2924,
1252
+ "step": 101500
1253
+ },
1254
+ {
1255
+ "epoch": 2.09,
1256
+ "learning_rate": 9.532350465082854e-06,
1257
+ "loss": 1.2741,
1258
+ "step": 102000
1259
+ },
1260
+ {
1261
+ "epoch": 2.1,
1262
+ "learning_rate": 9.481016816903145e-06,
1263
+ "loss": 1.2818,
1264
+ "step": 102500
1265
+ },
1266
+ {
1267
+ "epoch": 2.11,
1268
+ "learning_rate": 9.429683168723436e-06,
1269
+ "loss": 1.2764,
1270
+ "step": 103000
1271
+ },
1272
+ {
1273
+ "epoch": 2.13,
1274
+ "learning_rate": 9.378349520543727e-06,
1275
+ "loss": 1.2852,
1276
+ "step": 103500
1277
+ },
1278
+ {
1279
+ "epoch": 2.14,
1280
+ "learning_rate": 9.327015872364018e-06,
1281
+ "loss": 1.282,
1282
+ "step": 104000
1283
+ },
1284
+ {
1285
+ "epoch": 2.15,
1286
+ "learning_rate": 9.27568222418431e-06,
1287
+ "loss": 1.2787,
1288
+ "step": 104500
1289
+ },
1290
+ {
1291
+ "epoch": 2.16,
1292
+ "learning_rate": 9.22445124330096e-06,
1293
+ "loss": 1.2763,
1294
+ "step": 105000
1295
+ },
1296
+ {
1297
+ "epoch": 2.17,
1298
+ "learning_rate": 9.173117595121251e-06,
1299
+ "loss": 1.2758,
1300
+ "step": 105500
1301
+ },
1302
+ {
1303
+ "epoch": 2.18,
1304
+ "learning_rate": 9.121783946941542e-06,
1305
+ "loss": 1.2788,
1306
+ "step": 106000
1307
+ },
1308
+ {
1309
+ "epoch": 2.19,
1310
+ "learning_rate": 9.070450298761833e-06,
1311
+ "loss": 1.2916,
1312
+ "step": 106500
1313
+ },
1314
+ {
1315
+ "epoch": 2.2,
1316
+ "learning_rate": 9.019116650582124e-06,
1317
+ "loss": 1.285,
1318
+ "step": 107000
1319
+ },
1320
+ {
1321
+ "epoch": 2.21,
1322
+ "learning_rate": 8.967783002402417e-06,
1323
+ "loss": 1.2996,
1324
+ "step": 107500
1325
+ },
1326
+ {
1327
+ "epoch": 2.22,
1328
+ "learning_rate": 8.916552021519066e-06,
1329
+ "loss": 1.2857,
1330
+ "step": 108000
1331
+ },
1332
+ {
1333
+ "epoch": 2.23,
1334
+ "learning_rate": 8.865218373339357e-06,
1335
+ "loss": 1.2822,
1336
+ "step": 108500
1337
+ },
1338
+ {
1339
+ "epoch": 2.24,
1340
+ "learning_rate": 8.813884725159648e-06,
1341
+ "loss": 1.2912,
1342
+ "step": 109000
1343
+ },
1344
+ {
1345
+ "epoch": 2.25,
1346
+ "learning_rate": 8.762551076979939e-06,
1347
+ "loss": 1.2792,
1348
+ "step": 109500
1349
+ },
1350
+ {
1351
+ "epoch": 2.26,
1352
+ "learning_rate": 8.71121742880023e-06,
1353
+ "loss": 1.2979,
1354
+ "step": 110000
1355
+ },
1356
+ {
1357
+ "epoch": 2.27,
1358
+ "learning_rate": 8.659883780620521e-06,
1359
+ "loss": 1.2776,
1360
+ "step": 110500
1361
+ },
1362
+ {
1363
+ "epoch": 2.28,
1364
+ "learning_rate": 8.608550132440812e-06,
1365
+ "loss": 1.2855,
1366
+ "step": 111000
1367
+ },
1368
+ {
1369
+ "epoch": 2.29,
1370
+ "learning_rate": 8.557216484261103e-06,
1371
+ "loss": 1.2906,
1372
+ "step": 111500
1373
+ },
1374
+ {
1375
+ "epoch": 2.3,
1376
+ "learning_rate": 8.505985503377756e-06,
1377
+ "loss": 1.2918,
1378
+ "step": 112000
1379
+ },
1380
+ {
1381
+ "epoch": 2.31,
1382
+ "learning_rate": 8.454651855198047e-06,
1383
+ "loss": 1.2911,
1384
+ "step": 112500
1385
+ },
1386
+ {
1387
+ "epoch": 2.32,
1388
+ "learning_rate": 8.403318207018338e-06,
1389
+ "loss": 1.2917,
1390
+ "step": 113000
1391
+ },
1392
+ {
1393
+ "epoch": 2.33,
1394
+ "learning_rate": 8.351984558838629e-06,
1395
+ "loss": 1.2884,
1396
+ "step": 113500
1397
+ },
1398
+ {
1399
+ "epoch": 2.34,
1400
+ "learning_rate": 8.30065091065892e-06,
1401
+ "loss": 1.2882,
1402
+ "step": 114000
1403
+ },
1404
+ {
1405
+ "epoch": 2.35,
1406
+ "learning_rate": 8.24952259707193e-06,
1407
+ "loss": 1.282,
1408
+ "step": 114500
1409
+ },
1410
+ {
1411
+ "epoch": 2.36,
1412
+ "learning_rate": 8.198188948892221e-06,
1413
+ "loss": 1.2964,
1414
+ "step": 115000
1415
+ },
1416
+ {
1417
+ "epoch": 2.37,
1418
+ "learning_rate": 8.146855300712512e-06,
1419
+ "loss": 1.2933,
1420
+ "step": 115500
1421
+ },
1422
+ {
1423
+ "epoch": 2.38,
1424
+ "learning_rate": 8.095521652532803e-06,
1425
+ "loss": 1.2897,
1426
+ "step": 116000
1427
+ },
1428
+ {
1429
+ "epoch": 2.39,
1430
+ "learning_rate": 8.044290671649454e-06,
1431
+ "loss": 1.2811,
1432
+ "step": 116500
1433
+ },
1434
+ {
1435
+ "epoch": 2.4,
1436
+ "learning_rate": 7.992957023469745e-06,
1437
+ "loss": 1.281,
1438
+ "step": 117000
1439
+ },
1440
+ {
1441
+ "epoch": 2.41,
1442
+ "learning_rate": 7.941623375290036e-06,
1443
+ "loss": 1.2865,
1444
+ "step": 117500
1445
+ },
1446
+ {
1447
+ "epoch": 2.42,
1448
+ "learning_rate": 7.890289727110327e-06,
1449
+ "loss": 1.2891,
1450
+ "step": 118000
1451
+ },
1452
+ {
1453
+ "epoch": 2.43,
1454
+ "learning_rate": 7.838956078930618e-06,
1455
+ "loss": 1.287,
1456
+ "step": 118500
1457
+ },
1458
+ {
1459
+ "epoch": 2.44,
1460
+ "learning_rate": 7.787622430750909e-06,
1461
+ "loss": 1.283,
1462
+ "step": 119000
1463
+ },
1464
+ {
1465
+ "epoch": 2.45,
1466
+ "learning_rate": 7.7362887825712e-06,
1467
+ "loss": 1.278,
1468
+ "step": 119500
1469
+ },
1470
+ {
1471
+ "epoch": 2.46,
1472
+ "learning_rate": 7.684955134391491e-06,
1473
+ "loss": 1.2964,
1474
+ "step": 120000
1475
+ },
1476
+ {
1477
+ "epoch": 2.47,
1478
+ "learning_rate": 7.633621486211782e-06,
1479
+ "loss": 1.2913,
1480
+ "step": 120500
1481
+ },
1482
+ {
1483
+ "epoch": 2.48,
1484
+ "learning_rate": 7.582390505328433e-06,
1485
+ "loss": 1.2948,
1486
+ "step": 121000
1487
+ },
1488
+ {
1489
+ "epoch": 2.49,
1490
+ "learning_rate": 7.531056857148724e-06,
1491
+ "loss": 1.2951,
1492
+ "step": 121500
1493
+ },
1494
+ {
1495
+ "epoch": 2.51,
1496
+ "learning_rate": 7.479723208969015e-06,
1497
+ "loss": 1.2967,
1498
+ "step": 122000
1499
+ },
1500
+ {
1501
+ "epoch": 2.52,
1502
+ "learning_rate": 7.428389560789306e-06,
1503
+ "loss": 1.2929,
1504
+ "step": 122500
1505
+ },
1506
+ {
1507
+ "epoch": 2.53,
1508
+ "learning_rate": 7.377158579905958e-06,
1509
+ "loss": 1.2803,
1510
+ "step": 123000
1511
+ },
1512
+ {
1513
+ "epoch": 2.54,
1514
+ "learning_rate": 7.325824931726249e-06,
1515
+ "loss": 1.2916,
1516
+ "step": 123500
1517
+ },
1518
+ {
1519
+ "epoch": 2.55,
1520
+ "learning_rate": 7.27449128354654e-06,
1521
+ "loss": 1.294,
1522
+ "step": 124000
1523
+ },
1524
+ {
1525
+ "epoch": 2.56,
1526
+ "learning_rate": 7.223157635366831e-06,
1527
+ "loss": 1.2977,
1528
+ "step": 124500
1529
+ },
1530
+ {
1531
+ "epoch": 2.57,
1532
+ "learning_rate": 7.171823987187122e-06,
1533
+ "loss": 1.2951,
1534
+ "step": 125000
1535
+ },
1536
+ {
1537
+ "epoch": 2.58,
1538
+ "learning_rate": 7.120593006303772e-06,
1539
+ "loss": 1.2824,
1540
+ "step": 125500
1541
+ },
1542
+ {
1543
+ "epoch": 2.59,
1544
+ "learning_rate": 7.0692593581240645e-06,
1545
+ "loss": 1.2776,
1546
+ "step": 126000
1547
+ },
1548
+ {
1549
+ "epoch": 2.6,
1550
+ "learning_rate": 7.0179257099443555e-06,
1551
+ "loss": 1.2846,
1552
+ "step": 126500
1553
+ },
1554
+ {
1555
+ "epoch": 2.61,
1556
+ "learning_rate": 6.966694729061005e-06,
1557
+ "loss": 1.2986,
1558
+ "step": 127000
1559
+ },
1560
+ {
1561
+ "epoch": 2.62,
1562
+ "learning_rate": 6.9153610808812964e-06,
1563
+ "loss": 1.2913,
1564
+ "step": 127500
1565
+ },
1566
+ {
1567
+ "epoch": 2.63,
1568
+ "learning_rate": 6.8640274327015875e-06,
1569
+ "loss": 1.2924,
1570
+ "step": 128000
1571
+ },
1572
+ {
1573
+ "epoch": 2.64,
1574
+ "learning_rate": 6.8126937845218785e-06,
1575
+ "loss": 1.2834,
1576
+ "step": 128500
1577
+ },
1578
+ {
1579
+ "epoch": 2.65,
1580
+ "learning_rate": 6.7613601363421695e-06,
1581
+ "loss": 1.2963,
1582
+ "step": 129000
1583
+ },
1584
+ {
1585
+ "epoch": 2.66,
1586
+ "learning_rate": 6.710129155458821e-06,
1587
+ "loss": 1.2848,
1588
+ "step": 129500
1589
+ },
1590
+ {
1591
+ "epoch": 2.67,
1592
+ "learning_rate": 6.658795507279112e-06,
1593
+ "loss": 1.2774,
1594
+ "step": 130000
1595
+ },
1596
+ {
1597
+ "epoch": 2.68,
1598
+ "learning_rate": 6.607461859099403e-06,
1599
+ "loss": 1.303,
1600
+ "step": 130500
1601
+ },
1602
+ {
1603
+ "epoch": 2.69,
1604
+ "learning_rate": 6.556128210919694e-06,
1605
+ "loss": 1.2995,
1606
+ "step": 131000
1607
+ },
1608
+ {
1609
+ "epoch": 2.7,
1610
+ "learning_rate": 6.504794562739985e-06,
1611
+ "loss": 1.2789,
1612
+ "step": 131500
1613
+ },
1614
+ {
1615
+ "epoch": 2.71,
1616
+ "learning_rate": 6.453460914560276e-06,
1617
+ "loss": 1.2873,
1618
+ "step": 132000
1619
+ },
1620
+ {
1621
+ "epoch": 2.72,
1622
+ "learning_rate": 6.402127266380568e-06,
1623
+ "loss": 1.2966,
1624
+ "step": 132500
1625
+ },
1626
+ {
1627
+ "epoch": 2.73,
1628
+ "learning_rate": 6.350793618200859e-06,
1629
+ "loss": 1.2904,
1630
+ "step": 133000
1631
+ },
1632
+ {
1633
+ "epoch": 2.74,
1634
+ "learning_rate": 6.299562637317509e-06,
1635
+ "loss": 1.2777,
1636
+ "step": 133500
1637
+ },
1638
+ {
1639
+ "epoch": 2.75,
1640
+ "learning_rate": 6.2482289891378e-06,
1641
+ "loss": 1.2784,
1642
+ "step": 134000
1643
+ },
1644
+ {
1645
+ "epoch": 2.76,
1646
+ "learning_rate": 6.196895340958091e-06,
1647
+ "loss": 1.2644,
1648
+ "step": 134500
1649
+ },
1650
+ {
1651
+ "epoch": 2.77,
1652
+ "learning_rate": 6.145561692778382e-06,
1653
+ "loss": 1.2932,
1654
+ "step": 135000
1655
+ },
1656
+ {
1657
+ "epoch": 2.78,
1658
+ "learning_rate": 6.094330711895034e-06,
1659
+ "loss": 1.2863,
1660
+ "step": 135500
1661
+ },
1662
+ {
1663
+ "epoch": 2.79,
1664
+ "learning_rate": 6.042997063715325e-06,
1665
+ "loss": 1.3051,
1666
+ "step": 136000
1667
+ },
1668
+ {
1669
+ "epoch": 2.8,
1670
+ "learning_rate": 5.991766082831976e-06,
1671
+ "loss": 1.2866,
1672
+ "step": 136500
1673
+ },
1674
+ {
1675
+ "epoch": 2.81,
1676
+ "learning_rate": 5.940432434652267e-06,
1677
+ "loss": 1.2989,
1678
+ "step": 137000
1679
+ },
1680
+ {
1681
+ "epoch": 2.82,
1682
+ "learning_rate": 5.889098786472558e-06,
1683
+ "loss": 1.2696,
1684
+ "step": 137500
1685
+ },
1686
+ {
1687
+ "epoch": 2.83,
1688
+ "learning_rate": 5.837765138292849e-06,
1689
+ "loss": 1.3041,
1690
+ "step": 138000
1691
+ },
1692
+ {
1693
+ "epoch": 2.84,
1694
+ "learning_rate": 5.786431490113141e-06,
1695
+ "loss": 1.2941,
1696
+ "step": 138500
1697
+ },
1698
+ {
1699
+ "epoch": 2.85,
1700
+ "learning_rate": 5.735097841933432e-06,
1701
+ "loss": 1.2806,
1702
+ "step": 139000
1703
+ },
1704
+ {
1705
+ "epoch": 2.86,
1706
+ "learning_rate": 5.683764193753723e-06,
1707
+ "loss": 1.298,
1708
+ "step": 139500
1709
+ },
1710
+ {
1711
+ "epoch": 2.87,
1712
+ "learning_rate": 5.632430545574014e-06,
1713
+ "loss": 1.2748,
1714
+ "step": 140000
1715
+ },
1716
+ {
1717
+ "epoch": 2.88,
1718
+ "learning_rate": 5.581199564690664e-06,
1719
+ "loss": 1.2838,
1720
+ "step": 140500
1721
+ },
1722
+ {
1723
+ "epoch": 2.9,
1724
+ "learning_rate": 5.529968583807314e-06,
1725
+ "loss": 1.298,
1726
+ "step": 141000
1727
+ },
1728
+ {
1729
+ "epoch": 2.91,
1730
+ "learning_rate": 5.478634935627606e-06,
1731
+ "loss": 1.2884,
1732
+ "step": 141500
1733
+ },
1734
+ {
1735
+ "epoch": 2.92,
1736
+ "learning_rate": 5.427301287447897e-06,
1737
+ "loss": 1.2849,
1738
+ "step": 142000
1739
+ },
1740
+ {
1741
+ "epoch": 2.93,
1742
+ "learning_rate": 5.375967639268188e-06,
1743
+ "loss": 1.2805,
1744
+ "step": 142500
1745
+ },
1746
+ {
1747
+ "epoch": 2.94,
1748
+ "learning_rate": 5.324633991088479e-06,
1749
+ "loss": 1.2881,
1750
+ "step": 143000
1751
+ },
1752
+ {
1753
+ "epoch": 2.95,
1754
+ "learning_rate": 5.27330034290877e-06,
1755
+ "loss": 1.2981,
1756
+ "step": 143500
1757
+ },
1758
+ {
1759
+ "epoch": 2.96,
1760
+ "learning_rate": 5.221966694729061e-06,
1761
+ "loss": 1.2751,
1762
+ "step": 144000
1763
+ },
1764
+ {
1765
+ "epoch": 2.97,
1766
+ "learning_rate": 5.1706330465493524e-06,
1767
+ "loss": 1.2822,
1768
+ "step": 144500
1769
+ },
1770
+ {
1771
+ "epoch": 2.98,
1772
+ "learning_rate": 5.119402065666002e-06,
1773
+ "loss": 1.3016,
1774
+ "step": 145000
1775
+ },
1776
+ {
1777
+ "epoch": 2.99,
1778
+ "learning_rate": 5.068068417486295e-06,
1779
+ "loss": 1.2856,
1780
+ "step": 145500
1781
+ },
1782
+ {
1783
+ "epoch": 3.0,
1784
+ "learning_rate": 5.016734769306586e-06,
1785
+ "loss": 1.2799,
1786
+ "step": 146000
1787
+ },
1788
+ {
1789
+ "epoch": 3.0,
1790
+ "eval_gen_len": 58.57,
1791
+ "eval_loss": 2.115015983581543,
1792
+ "eval_rouge1": 23.0677,
1793
+ "eval_rouge2": 10.5086,
1794
+ "eval_rougeL": 19.9851,
1795
+ "eval_rougeLsum": 20.6597,
1796
+ "eval_runtime": 18.1865,
1797
+ "eval_samples_per_second": 5.499,
1798
+ "eval_steps_per_second": 0.385,
1799
+ "step": 146103
1800
+ },
1801
+ {
1802
+ "epoch": 3.01,
1803
+ "learning_rate": 4.965401121126876e-06,
1804
+ "loss": 1.153,
1805
+ "step": 146500
1806
+ },
1807
+ {
1808
+ "epoch": 3.02,
1809
+ "learning_rate": 4.914067472947167e-06,
1810
+ "loss": 1.1045,
1811
+ "step": 147000
1812
+ },
1813
+ {
1814
+ "epoch": 3.03,
1815
+ "learning_rate": 4.862836492063818e-06,
1816
+ "loss": 1.1333,
1817
+ "step": 147500
1818
+ },
1819
+ {
1820
+ "epoch": 3.04,
1821
+ "learning_rate": 4.811502843884109e-06,
1822
+ "loss": 1.1321,
1823
+ "step": 148000
1824
+ },
1825
+ {
1826
+ "epoch": 3.05,
1827
+ "learning_rate": 4.7601691957044e-06,
1828
+ "loss": 1.1175,
1829
+ "step": 148500
1830
+ },
1831
+ {
1832
+ "epoch": 3.06,
1833
+ "learning_rate": 4.708835547524692e-06,
1834
+ "loss": 1.1196,
1835
+ "step": 149000
1836
+ },
1837
+ {
1838
+ "epoch": 3.07,
1839
+ "learning_rate": 4.657501899344983e-06,
1840
+ "loss": 1.1221,
1841
+ "step": 149500
1842
+ },
1843
+ {
1844
+ "epoch": 3.08,
1845
+ "learning_rate": 4.606168251165274e-06,
1846
+ "loss": 1.1262,
1847
+ "step": 150000
1848
+ },
1849
+ {
1850
+ "epoch": 3.09,
1851
+ "learning_rate": 4.554834602985566e-06,
1852
+ "loss": 1.1147,
1853
+ "step": 150500
1854
+ },
1855
+ {
1856
+ "epoch": 3.1,
1857
+ "learning_rate": 4.503603622102216e-06,
1858
+ "loss": 1.1275,
1859
+ "step": 151000
1860
+ },
1861
+ {
1862
+ "epoch": 3.11,
1863
+ "learning_rate": 4.452269973922507e-06,
1864
+ "loss": 1.1155,
1865
+ "step": 151500
1866
+ },
1867
+ {
1868
+ "epoch": 3.12,
1869
+ "learning_rate": 4.400936325742799e-06,
1870
+ "loss": 1.1162,
1871
+ "step": 152000
1872
+ },
1873
+ {
1874
+ "epoch": 3.13,
1875
+ "learning_rate": 4.34960267756309e-06,
1876
+ "loss": 1.1204,
1877
+ "step": 152500
1878
+ },
1879
+ {
1880
+ "epoch": 3.14,
1881
+ "learning_rate": 4.298269029383381e-06,
1882
+ "loss": 1.1135,
1883
+ "step": 153000
1884
+ },
1885
+ {
1886
+ "epoch": 3.15,
1887
+ "learning_rate": 4.246935381203672e-06,
1888
+ "loss": 1.1153,
1889
+ "step": 153500
1890
+ },
1891
+ {
1892
+ "epoch": 3.16,
1893
+ "learning_rate": 4.195601733023963e-06,
1894
+ "loss": 1.1198,
1895
+ "step": 154000
1896
+ },
1897
+ {
1898
+ "epoch": 3.17,
1899
+ "learning_rate": 4.144268084844254e-06,
1900
+ "loss": 1.1327,
1901
+ "step": 154500
1902
+ },
1903
+ {
1904
+ "epoch": 3.18,
1905
+ "learning_rate": 4.092934436664545e-06,
1906
+ "loss": 1.1097,
1907
+ "step": 155000
1908
+ },
1909
+ {
1910
+ "epoch": 3.19,
1911
+ "learning_rate": 4.041703455781196e-06,
1912
+ "loss": 1.1378,
1913
+ "step": 155500
1914
+ },
1915
+ {
1916
+ "epoch": 3.2,
1917
+ "learning_rate": 3.990369807601487e-06,
1918
+ "loss": 1.1282,
1919
+ "step": 156000
1920
+ },
1921
+ {
1922
+ "epoch": 3.21,
1923
+ "learning_rate": 3.939036159421778e-06,
1924
+ "loss": 1.1256,
1925
+ "step": 156500
1926
+ },
1927
+ {
1928
+ "epoch": 3.22,
1929
+ "learning_rate": 3.8878051785384286e-06,
1930
+ "loss": 1.1362,
1931
+ "step": 157000
1932
+ },
1933
+ {
1934
+ "epoch": 3.23,
1935
+ "learning_rate": 3.83647153035872e-06,
1936
+ "loss": 1.1225,
1937
+ "step": 157500
1938
+ },
1939
+ {
1940
+ "epoch": 3.24,
1941
+ "learning_rate": 3.7851378821790106e-06,
1942
+ "loss": 1.1247,
1943
+ "step": 158000
1944
+ },
1945
+ {
1946
+ "epoch": 3.25,
1947
+ "learning_rate": 3.733804233999302e-06,
1948
+ "loss": 1.1327,
1949
+ "step": 158500
1950
+ },
1951
+ {
1952
+ "epoch": 3.26,
1953
+ "learning_rate": 3.6824705858195936e-06,
1954
+ "loss": 1.1277,
1955
+ "step": 159000
1956
+ },
1957
+ {
1958
+ "epoch": 3.28,
1959
+ "learning_rate": 3.6311369376398846e-06,
1960
+ "loss": 1.1271,
1961
+ "step": 159500
1962
+ },
1963
+ {
1964
+ "epoch": 3.29,
1965
+ "learning_rate": 3.579803289460176e-06,
1966
+ "loss": 1.1356,
1967
+ "step": 160000
1968
+ },
1969
+ {
1970
+ "epoch": 3.3,
1971
+ "learning_rate": 3.528469641280467e-06,
1972
+ "loss": 1.1197,
1973
+ "step": 160500
1974
+ },
1975
+ {
1976
+ "epoch": 3.31,
1977
+ "learning_rate": 3.477341327693477e-06,
1978
+ "loss": 1.1391,
1979
+ "step": 161000
1980
+ },
1981
+ {
1982
+ "epoch": 3.32,
1983
+ "learning_rate": 3.426007679513768e-06,
1984
+ "loss": 1.1144,
1985
+ "step": 161500
1986
+ },
1987
+ {
1988
+ "epoch": 3.33,
1989
+ "learning_rate": 3.374674031334059e-06,
1990
+ "loss": 1.1291,
1991
+ "step": 162000
1992
+ },
1993
+ {
1994
+ "epoch": 3.34,
1995
+ "learning_rate": 3.3233403831543502e-06,
1996
+ "loss": 1.1129,
1997
+ "step": 162500
1998
+ },
1999
+ {
2000
+ "epoch": 3.35,
2001
+ "learning_rate": 3.2720067349746417e-06,
2002
+ "loss": 1.1279,
2003
+ "step": 163000
2004
+ },
2005
+ {
2006
+ "epoch": 3.36,
2007
+ "learning_rate": 3.2206730867949327e-06,
2008
+ "loss": 1.1118,
2009
+ "step": 163500
2010
+ },
2011
+ {
2012
+ "epoch": 3.37,
2013
+ "learning_rate": 3.169442105911583e-06,
2014
+ "loss": 1.1295,
2015
+ "step": 164000
2016
+ },
2017
+ {
2018
+ "epoch": 3.38,
2019
+ "learning_rate": 3.1181084577318745e-06,
2020
+ "loss": 1.1182,
2021
+ "step": 164500
2022
+ },
2023
+ {
2024
+ "epoch": 3.39,
2025
+ "learning_rate": 3.0667748095521655e-06,
2026
+ "loss": 1.1283,
2027
+ "step": 165000
2028
+ },
2029
+ {
2030
+ "epoch": 3.4,
2031
+ "learning_rate": 3.0154411613724566e-06,
2032
+ "loss": 1.1112,
2033
+ "step": 165500
2034
+ },
2035
+ {
2036
+ "epoch": 3.41,
2037
+ "learning_rate": 2.9641075131927476e-06,
2038
+ "loss": 1.1313,
2039
+ "step": 166000
2040
+ },
2041
+ {
2042
+ "epoch": 3.42,
2043
+ "learning_rate": 2.9127738650130386e-06,
2044
+ "loss": 1.1318,
2045
+ "step": 166500
2046
+ },
2047
+ {
2048
+ "epoch": 3.43,
2049
+ "learning_rate": 2.86144021683333e-06,
2050
+ "loss": 1.1199,
2051
+ "step": 167000
2052
+ },
2053
+ {
2054
+ "epoch": 3.44,
2055
+ "learning_rate": 2.810106568653621e-06,
2056
+ "loss": 1.1231,
2057
+ "step": 167500
2058
+ },
2059
+ {
2060
+ "epoch": 3.45,
2061
+ "learning_rate": 2.758772920473912e-06,
2062
+ "loss": 1.1332,
2063
+ "step": 168000
2064
+ },
2065
+ {
2066
+ "epoch": 3.46,
2067
+ "learning_rate": 2.7076446068869222e-06,
2068
+ "loss": 1.1247,
2069
+ "step": 168500
2070
+ },
2071
+ {
2072
+ "epoch": 3.47,
2073
+ "learning_rate": 2.656310958707214e-06,
2074
+ "loss": 1.1271,
2075
+ "step": 169000
2076
+ },
2077
+ {
2078
+ "epoch": 3.48,
2079
+ "learning_rate": 2.604977310527505e-06,
2080
+ "loss": 1.1131,
2081
+ "step": 169500
2082
+ },
2083
+ {
2084
+ "epoch": 3.49,
2085
+ "learning_rate": 2.553643662347796e-06,
2086
+ "loss": 1.1231,
2087
+ "step": 170000
2088
+ },
2089
+ {
2090
+ "epoch": 3.5,
2091
+ "learning_rate": 2.502412681464447e-06,
2092
+ "loss": 1.1218,
2093
+ "step": 170500
2094
+ },
2095
+ {
2096
+ "epoch": 3.51,
2097
+ "learning_rate": 2.4510790332847375e-06,
2098
+ "loss": 1.1151,
2099
+ "step": 171000
2100
+ },
2101
+ {
2102
+ "epoch": 3.52,
2103
+ "learning_rate": 2.399745385105029e-06,
2104
+ "loss": 1.1153,
2105
+ "step": 171500
2106
+ },
2107
+ {
2108
+ "epoch": 3.53,
2109
+ "learning_rate": 2.34841173692532e-06,
2110
+ "loss": 1.1159,
2111
+ "step": 172000
2112
+ },
2113
+ {
2114
+ "epoch": 3.54,
2115
+ "learning_rate": 2.297078088745611e-06,
2116
+ "loss": 1.1214,
2117
+ "step": 172500
2118
+ },
2119
+ {
2120
+ "epoch": 3.55,
2121
+ "learning_rate": 2.2457444405659025e-06,
2122
+ "loss": 1.1259,
2123
+ "step": 173000
2124
+ },
2125
+ {
2126
+ "epoch": 3.56,
2127
+ "learning_rate": 2.1944107923861935e-06,
2128
+ "loss": 1.1348,
2129
+ "step": 173500
2130
+ },
2131
+ {
2132
+ "epoch": 3.57,
2133
+ "learning_rate": 2.1430771442064846e-06,
2134
+ "loss": 1.121,
2135
+ "step": 174000
2136
+ },
2137
+ {
2138
+ "epoch": 3.58,
2139
+ "learning_rate": 2.0918461633231353e-06,
2140
+ "loss": 1.1177,
2141
+ "step": 174500
2142
+ },
2143
+ {
2144
+ "epoch": 3.59,
2145
+ "learning_rate": 2.0405125151434264e-06,
2146
+ "loss": 1.1201,
2147
+ "step": 175000
2148
+ },
2149
+ {
2150
+ "epoch": 3.6,
2151
+ "learning_rate": 1.9891788669637174e-06,
2152
+ "loss": 1.1316,
2153
+ "step": 175500
2154
+ },
2155
+ {
2156
+ "epoch": 3.61,
2157
+ "learning_rate": 1.937845218784009e-06,
2158
+ "loss": 1.1277,
2159
+ "step": 176000
2160
+ },
2161
+ {
2162
+ "epoch": 3.62,
2163
+ "learning_rate": 1.8865115706042999e-06,
2164
+ "loss": 1.1211,
2165
+ "step": 176500
2166
+ },
2167
+ {
2168
+ "epoch": 3.63,
2169
+ "learning_rate": 1.8351779224245911e-06,
2170
+ "loss": 1.1206,
2171
+ "step": 177000
2172
+ },
2173
+ {
2174
+ "epoch": 3.64,
2175
+ "learning_rate": 1.7839469415412417e-06,
2176
+ "loss": 1.1177,
2177
+ "step": 177500
2178
+ },
2179
+ {
2180
+ "epoch": 3.65,
2181
+ "learning_rate": 1.7326132933615327e-06,
2182
+ "loss": 1.127,
2183
+ "step": 178000
2184
+ },
2185
+ {
2186
+ "epoch": 3.67,
2187
+ "learning_rate": 1.681279645181824e-06,
2188
+ "loss": 1.1204,
2189
+ "step": 178500
2190
+ },
2191
+ {
2192
+ "epoch": 3.68,
2193
+ "learning_rate": 1.629945997002115e-06,
2194
+ "loss": 1.126,
2195
+ "step": 179000
2196
+ },
2197
+ {
2198
+ "epoch": 3.69,
2199
+ "learning_rate": 1.5787150161187655e-06,
2200
+ "loss": 1.1168,
2201
+ "step": 179500
2202
+ },
2203
+ {
2204
+ "epoch": 3.7,
2205
+ "learning_rate": 1.527484035235416e-06,
2206
+ "loss": 1.1124,
2207
+ "step": 180000
2208
+ },
2209
+ {
2210
+ "epoch": 3.71,
2211
+ "learning_rate": 1.4761503870557073e-06,
2212
+ "loss": 1.1112,
2213
+ "step": 180500
2214
+ },
2215
+ {
2216
+ "epoch": 3.72,
2217
+ "learning_rate": 1.4248167388759983e-06,
2218
+ "loss": 1.1164,
2219
+ "step": 181000
2220
+ },
2221
+ {
2222
+ "epoch": 3.73,
2223
+ "learning_rate": 1.3734830906962898e-06,
2224
+ "loss": 1.1178,
2225
+ "step": 181500
2226
+ },
2227
+ {
2228
+ "epoch": 3.74,
2229
+ "learning_rate": 1.322149442516581e-06,
2230
+ "loss": 1.1106,
2231
+ "step": 182000
2232
+ },
2233
+ {
2234
+ "epoch": 3.75,
2235
+ "learning_rate": 1.270815794336872e-06,
2236
+ "loss": 1.127,
2237
+ "step": 182500
2238
+ },
2239
+ {
2240
+ "epoch": 3.76,
2241
+ "learning_rate": 1.2194821461571631e-06,
2242
+ "loss": 1.1225,
2243
+ "step": 183000
2244
+ },
2245
+ {
2246
+ "epoch": 3.77,
2247
+ "learning_rate": 1.1681484979774544e-06,
2248
+ "loss": 1.1211,
2249
+ "step": 183500
2250
+ },
2251
+ {
2252
+ "epoch": 3.78,
2253
+ "learning_rate": 1.116917517094105e-06,
2254
+ "loss": 1.1109,
2255
+ "step": 184000
2256
+ },
2257
+ {
2258
+ "epoch": 3.79,
2259
+ "learning_rate": 1.0655838689143961e-06,
2260
+ "loss": 1.1099,
2261
+ "step": 184500
2262
+ },
2263
+ {
2264
+ "epoch": 3.8,
2265
+ "learning_rate": 1.0142502207346874e-06,
2266
+ "loss": 1.1094,
2267
+ "step": 185000
2268
+ },
2269
+ {
2270
+ "epoch": 3.81,
2271
+ "learning_rate": 9.629165725549784e-07,
2272
+ "loss": 1.1355,
2273
+ "step": 185500
2274
+ },
2275
+ {
2276
+ "epoch": 3.82,
2277
+ "learning_rate": 9.115829243752696e-07,
2278
+ "loss": 1.112,
2279
+ "step": 186000
2280
+ },
2281
+ {
2282
+ "epoch": 3.83,
2283
+ "learning_rate": 8.603519434919201e-07,
2284
+ "loss": 1.1186,
2285
+ "step": 186500
2286
+ },
2287
+ {
2288
+ "epoch": 3.84,
2289
+ "learning_rate": 8.090182953122114e-07,
2290
+ "loss": 1.1284,
2291
+ "step": 187000
2292
+ },
2293
+ {
2294
+ "epoch": 3.85,
2295
+ "learning_rate": 7.576846471325025e-07,
2296
+ "loss": 1.1177,
2297
+ "step": 187500
2298
+ },
2299
+ {
2300
+ "epoch": 3.86,
2301
+ "learning_rate": 7.063509989527936e-07,
2302
+ "loss": 1.1208,
2303
+ "step": 188000
2304
+ },
2305
+ {
2306
+ "epoch": 3.87,
2307
+ "learning_rate": 6.550173507730848e-07,
2308
+ "loss": 1.1142,
2309
+ "step": 188500
2310
+ },
2311
+ {
2312
+ "epoch": 3.88,
2313
+ "learning_rate": 6.037863698897354e-07,
2314
+ "loss": 1.1091,
2315
+ "step": 189000
2316
+ },
2317
+ {
2318
+ "epoch": 3.89,
2319
+ "learning_rate": 5.524527217100266e-07,
2320
+ "loss": 1.1055,
2321
+ "step": 189500
2322
+ },
2323
+ {
2324
+ "epoch": 3.9,
2325
+ "learning_rate": 5.011190735303177e-07,
2326
+ "loss": 1.1321,
2327
+ "step": 190000
2328
+ },
2329
+ {
2330
+ "epoch": 3.91,
2331
+ "learning_rate": 4.4978542535060883e-07,
2332
+ "loss": 1.1177,
2333
+ "step": 190500
2334
+ },
2335
+ {
2336
+ "epoch": 3.92,
2337
+ "learning_rate": 3.9855444446725943e-07,
2338
+ "loss": 1.1174,
2339
+ "step": 191000
2340
+ },
2341
+ {
2342
+ "epoch": 3.93,
2343
+ "learning_rate": 3.4732346358391e-07,
2344
+ "loss": 1.1177,
2345
+ "step": 191500
2346
+ },
2347
+ {
2348
+ "epoch": 3.94,
2349
+ "learning_rate": 2.9598981540420117e-07,
2350
+ "loss": 1.1043,
2351
+ "step": 192000
2352
+ },
2353
+ {
2354
+ "epoch": 3.95,
2355
+ "learning_rate": 2.4465616722449236e-07,
2356
+ "loss": 1.1063,
2357
+ "step": 192500
2358
+ },
2359
+ {
2360
+ "epoch": 3.96,
2361
+ "learning_rate": 1.933225190447835e-07,
2362
+ "loss": 1.1233,
2363
+ "step": 193000
2364
+ },
2365
+ {
2366
+ "epoch": 3.97,
2367
+ "learning_rate": 1.4198887086507466e-07,
2368
+ "loss": 1.1186,
2369
+ "step": 193500
2370
+ },
2371
+ {
2372
+ "epoch": 3.98,
2373
+ "learning_rate": 9.06552226853658e-08,
2374
+ "loss": 1.1049,
2375
+ "step": 194000
2376
+ },
2377
+ {
2378
+ "epoch": 3.99,
2379
+ "learning_rate": 3.9424241802016386e-08,
2380
+ "loss": 1.1201,
2381
+ "step": 194500
2382
+ },
2383
+ {
2384
+ "epoch": 4.0,
2385
+ "eval_gen_len": 58.68,
2386
+ "eval_loss": 2.1514933109283447,
2387
+ "eval_rouge1": 22.8624,
2388
+ "eval_rouge2": 10.7296,
2389
+ "eval_rougeL": 20.1567,
2390
+ "eval_rougeLsum": 20.6088,
2391
+ "eval_runtime": 18.3644,
2392
+ "eval_samples_per_second": 5.445,
2393
+ "eval_steps_per_second": 0.381,
2394
+ "step": 194804
2395
+ }
2396
+ ],
2397
+ "max_steps": 194804,
2398
+ "num_train_epochs": 4,
2399
+ "total_flos": 6.740081427542286e+18,
2400
+ "trial_name": null,
2401
+ "trial_params": null
2402
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75e68f53ec4cdf2c579dc6d93417811fd4fcb0cb70cb7d71a7fef37f3550a5a9
3
+ size 3439
vocab.json ADDED
The diff for this file is too large to render. See raw diff