eyalmazuz commited on
Commit
d94fc55
1 Parent(s): adae9ee

Upload 12 files

Browse files
config.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/home/patrick/hugging_face/t5/t5-v1_1-base",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "d_ff": 2048,
7
+ "d_kv": 64,
8
+ "d_model": 768,
9
+ "decoder_start_token_id": 2,
10
+ "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
+ "eos_token_id": 3,
13
+ "feed_forward_proj": "gated-gelu",
14
+ "initializer_factor": 1.0,
15
+ "is_encoder_decoder": true,
16
+ "is_gated_act": true,
17
+ "layer_norm_epsilon": 1e-06,
18
+ "model_type": "t5",
19
+ "num_decoder_layers": 12,
20
+ "num_heads": 12,
21
+ "num_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 0,
24
+ "relative_attention_max_distance": 128,
25
+ "relative_attention_num_buckets": 32,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "float32",
28
+ "transformers_version": "4.27.0.dev0",
29
+ "use_cache": true,
30
+ "vocab_size": 32100
31
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
1
+ {
2
+ "decoder_start_token_id": 2,
3
+ "eos_token_id": 3,
4
+ "pad_token_id": 0,
5
+ "transformers_version": "4.27.0.dev0"
6
+ }
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b16c95c61840cbbe54a30ae0e087f3cbd6174776cb1c76e91a5679fa69090ff5
3
+ size 1980446085
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48bd864639eec9191c8c85143c2142848a6794486d885911e2cb2d92477161c8
3
+ size 990236853
rng_state_0.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99f75dfea40d7e90412835f0e7db3bc5b2df07f8fc16c850a7462ab37be91251
3
+ size 14583
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e70b5945654ca35a76139b5908295396538ad29607a8bc18ef8b73cb68670f6a
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "bos_token": "<s>",
105
+ "eos_token": "</s>",
106
+ "pad_token": "<pad>",
107
+ "unk_token": "<unk>"
108
+ }
spiece.model ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270da11f1d731cd2234015d5c233dba7c5b5996a466fbf74e24f7ea332924b93
3
+ size 882013
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
tokenizer_config.json ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<extra_id_0>",
4
+ "<extra_id_1>",
5
+ "<extra_id_2>",
6
+ "<extra_id_3>",
7
+ "<extra_id_4>",
8
+ "<extra_id_5>",
9
+ "<extra_id_6>",
10
+ "<extra_id_7>",
11
+ "<extra_id_8>",
12
+ "<extra_id_9>",
13
+ "<extra_id_10>",
14
+ "<extra_id_11>",
15
+ "<extra_id_12>",
16
+ "<extra_id_13>",
17
+ "<extra_id_14>",
18
+ "<extra_id_15>",
19
+ "<extra_id_16>",
20
+ "<extra_id_17>",
21
+ "<extra_id_18>",
22
+ "<extra_id_19>",
23
+ "<extra_id_20>",
24
+ "<extra_id_21>",
25
+ "<extra_id_22>",
26
+ "<extra_id_23>",
27
+ "<extra_id_24>",
28
+ "<extra_id_25>",
29
+ "<extra_id_26>",
30
+ "<extra_id_27>",
31
+ "<extra_id_28>",
32
+ "<extra_id_29>",
33
+ "<extra_id_30>",
34
+ "<extra_id_31>",
35
+ "<extra_id_32>",
36
+ "<extra_id_33>",
37
+ "<extra_id_34>",
38
+ "<extra_id_35>",
39
+ "<extra_id_36>",
40
+ "<extra_id_37>",
41
+ "<extra_id_38>",
42
+ "<extra_id_39>",
43
+ "<extra_id_40>",
44
+ "<extra_id_41>",
45
+ "<extra_id_42>",
46
+ "<extra_id_43>",
47
+ "<extra_id_44>",
48
+ "<extra_id_45>",
49
+ "<extra_id_46>",
50
+ "<extra_id_47>",
51
+ "<extra_id_48>",
52
+ "<extra_id_49>",
53
+ "<extra_id_50>",
54
+ "<extra_id_51>",
55
+ "<extra_id_52>",
56
+ "<extra_id_53>",
57
+ "<extra_id_54>",
58
+ "<extra_id_55>",
59
+ "<extra_id_56>",
60
+ "<extra_id_57>",
61
+ "<extra_id_58>",
62
+ "<extra_id_59>",
63
+ "<extra_id_60>",
64
+ "<extra_id_61>",
65
+ "<extra_id_62>",
66
+ "<extra_id_63>",
67
+ "<extra_id_64>",
68
+ "<extra_id_65>",
69
+ "<extra_id_66>",
70
+ "<extra_id_67>",
71
+ "<extra_id_68>",
72
+ "<extra_id_69>",
73
+ "<extra_id_70>",
74
+ "<extra_id_71>",
75
+ "<extra_id_72>",
76
+ "<extra_id_73>",
77
+ "<extra_id_74>",
78
+ "<extra_id_75>",
79
+ "<extra_id_76>",
80
+ "<extra_id_77>",
81
+ "<extra_id_78>",
82
+ "<extra_id_79>",
83
+ "<extra_id_80>",
84
+ "<extra_id_81>",
85
+ "<extra_id_82>",
86
+ "<extra_id_83>",
87
+ "<extra_id_84>",
88
+ "<extra_id_85>",
89
+ "<extra_id_86>",
90
+ "<extra_id_87>",
91
+ "<extra_id_88>",
92
+ "<extra_id_89>",
93
+ "<extra_id_90>",
94
+ "<extra_id_91>",
95
+ "<extra_id_92>",
96
+ "<extra_id_93>",
97
+ "<extra_id_94>",
98
+ "<extra_id_95>",
99
+ "<extra_id_96>",
100
+ "<extra_id_97>",
101
+ "<extra_id_98>",
102
+ "<extra_id_99>"
103
+ ],
104
+ "bos_token": "<s>",
105
+ "eos_token": "</s>",
106
+ "extra_ids": 100,
107
+ "model_max_length": 1000000000000000019884624838656,
108
+ "pad_token": "<pad>",
109
+ "sp_model_kwargs": {},
110
+ "special_tokens_map_file": "./T5Tokenizer/special_tokens_map.json",
111
+ "tokenizer_class": "T5Tokenizer",
112
+ "unk_token": "<unk>"
113
+ }
trainer_state.json ADDED
@@ -0,0 +1,1738 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.739096280610677,
5
+ "global_step": 21000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.27,
12
+ "learning_rate": 0.00015,
13
+ "loss": 46.3847,
14
+ "step": 1000
15
+ },
16
+ {
17
+ "epoch": 0.27,
18
+ "eval_validation_chrf_beta": 2,
19
+ "eval_validation_chrf_char_order": 6,
20
+ "eval_validation_chrf_score": 6.927323312955705,
21
+ "eval_validation_chrf_word_order": 2,
22
+ "eval_validation_loss": 16.81111717224121,
23
+ "eval_validation_meteor": 0.06701502044580518,
24
+ "eval_validation_runtime": 40.1943,
25
+ "eval_validation_samples_per_second": 24.805,
26
+ "eval_validation_scarebleu_bp": 1.0,
27
+ "eval_validation_scarebleu_counts": [
28
+ 2501,
29
+ 140,
30
+ 21,
31
+ 3
32
+ ],
33
+ "eval_validation_scarebleu_precisions": [
34
+ 9.256106587712805,
35
+ 0.5373248896564958,
36
+ 0.08352889702080267,
37
+ 0.01236603462489695
38
+ ],
39
+ "eval_validation_scarebleu_ref_len": 19414,
40
+ "eval_validation_scarebleu_score": 0.26772142473423427,
41
+ "eval_validation_scarebleu_sys_len": 27020,
42
+ "eval_validation_scarebleu_totals": [
43
+ 27020,
44
+ 26055,
45
+ 25141,
46
+ 24260
47
+ ],
48
+ "eval_validation_steps_per_second": 0.398,
49
+ "eval_validation_ter_num_edits": 24081,
50
+ "eval_validation_ter_ref_length": 16729.0,
51
+ "eval_validation_ter_score": 143.9476358419511,
52
+ "step": 1000
53
+ },
54
+ {
55
+ "epoch": 0.27,
56
+ "eval_test_chrf_beta": 2,
57
+ "eval_test_chrf_char_order": 6,
58
+ "eval_test_chrf_score": 6.879629526302676,
59
+ "eval_test_chrf_word_order": 2,
60
+ "eval_test_loss": 16.940214157104492,
61
+ "eval_test_meteor": 0.06578125012212357,
62
+ "eval_test_runtime": 43.5255,
63
+ "eval_test_samples_per_second": 23.251,
64
+ "eval_test_scarebleu_bp": 1.0,
65
+ "eval_test_scarebleu_counts": [
66
+ 2620,
67
+ 181,
68
+ 21,
69
+ 1
70
+ ],
71
+ "eval_test_scarebleu_precisions": [
72
+ 8.875639418679494,
73
+ 0.6341087443946188,
74
+ 0.07605113533480606,
75
+ 0.0037420948246828577
76
+ ],
77
+ "eval_test_scarebleu_ref_len": 20238,
78
+ "eval_test_scarebleu_score": 0.20005341558923276,
79
+ "eval_test_scarebleu_sys_len": 29519,
80
+ "eval_test_scarebleu_totals": [
81
+ 29519,
82
+ 28544,
83
+ 27613,
84
+ 26723
85
+ ],
86
+ "eval_test_steps_per_second": 0.368,
87
+ "eval_test_ter_num_edits": 25915,
88
+ "eval_test_ter_ref_length": 17367.0,
89
+ "eval_test_ter_score": 149.21978464904703,
90
+ "step": 1000
91
+ },
92
+ {
93
+ "epoch": 0.55,
94
+ "learning_rate": 0.0003,
95
+ "loss": 19.9672,
96
+ "step": 2000
97
+ },
98
+ {
99
+ "epoch": 0.55,
100
+ "eval_validation_chrf_beta": 2,
101
+ "eval_validation_chrf_char_order": 6,
102
+ "eval_validation_chrf_score": 6.608664701301832,
103
+ "eval_validation_chrf_word_order": 2,
104
+ "eval_validation_loss": 9.209909439086914,
105
+ "eval_validation_meteor": 0.06402750263419245,
106
+ "eval_validation_runtime": 39.5005,
107
+ "eval_validation_samples_per_second": 25.24,
108
+ "eval_validation_scarebleu_bp": 0.7877829309778815,
109
+ "eval_validation_scarebleu_counts": [
110
+ 2144,
111
+ 200,
112
+ 35,
113
+ 2
114
+ ],
115
+ "eval_validation_scarebleu_precisions": [
116
+ 13.677830940988835,
117
+ 1.352356481168436,
118
+ 0.2506624650862995,
119
+ 0.015055706112616682
120
+ ],
121
+ "eval_validation_scarebleu_ref_len": 19414,
122
+ "eval_validation_scarebleu_score": 0.4049312368014021,
123
+ "eval_validation_scarebleu_sys_len": 15675,
124
+ "eval_validation_scarebleu_totals": [
125
+ 15675,
126
+ 14789,
127
+ 13963,
128
+ 13284
129
+ ],
130
+ "eval_validation_steps_per_second": 0.405,
131
+ "eval_validation_ter_num_edits": 19406,
132
+ "eval_validation_ter_ref_length": 16729.0,
133
+ "eval_validation_ter_score": 116.00215195170065,
134
+ "step": 2000
135
+ },
136
+ {
137
+ "epoch": 0.55,
138
+ "eval_test_chrf_beta": 2,
139
+ "eval_test_chrf_char_order": 6,
140
+ "eval_test_chrf_score": 6.872536195708606,
141
+ "eval_test_chrf_word_order": 2,
142
+ "eval_test_loss": 9.260172843933105,
143
+ "eval_test_meteor": 0.06539736281948932,
144
+ "eval_test_runtime": 42.6967,
145
+ "eval_test_samples_per_second": 23.702,
146
+ "eval_test_scarebleu_bp": 0.8237885182366754,
147
+ "eval_test_scarebleu_counts": [
148
+ 2294,
149
+ 254,
150
+ 51,
151
+ 9
152
+ ],
153
+ "eval_test_scarebleu_precisions": [
154
+ 13.532326569136385,
155
+ 1.582258767831558,
156
+ 0.33537186821858356,
157
+ 0.06213324128408699
158
+ ],
159
+ "eval_test_scarebleu_ref_len": 20238,
160
+ "eval_test_scarebleu_score": 0.6732725214400193,
161
+ "eval_test_scarebleu_sys_len": 16952,
162
+ "eval_test_scarebleu_totals": [
163
+ 16952,
164
+ 16053,
165
+ 15207,
166
+ 14485
167
+ ],
168
+ "eval_test_steps_per_second": 0.375,
169
+ "eval_test_ter_num_edits": 20720,
170
+ "eval_test_ter_ref_length": 17367.0,
171
+ "eval_test_ter_score": 119.30673115679163,
172
+ "step": 2000
173
+ },
174
+ {
175
+ "epoch": 0.82,
176
+ "learning_rate": 0.0002999986101989992,
177
+ "loss": 8.7409,
178
+ "step": 3000
179
+ },
180
+ {
181
+ "epoch": 0.82,
182
+ "eval_validation_chrf_beta": 2,
183
+ "eval_validation_chrf_char_order": 6,
184
+ "eval_validation_chrf_score": 13.23574850542808,
185
+ "eval_validation_chrf_word_order": 2,
186
+ "eval_validation_loss": 6.500675201416016,
187
+ "eval_validation_meteor": 0.1196379582028911,
188
+ "eval_validation_runtime": 40.0578,
189
+ "eval_validation_samples_per_second": 24.889,
190
+ "eval_validation_scarebleu_bp": 1.0,
191
+ "eval_validation_scarebleu_counts": [
192
+ 4007,
193
+ 528,
194
+ 99,
195
+ 14
196
+ ],
197
+ "eval_validation_scarebleu_precisions": [
198
+ 12.693635758862102,
199
+ 1.7262799973844243,
200
+ 0.334256195556756,
201
+ 0.04885538805136795
202
+ ],
203
+ "eval_validation_scarebleu_ref_len": 19414,
204
+ "eval_validation_scarebleu_score": 0.7734322925143389,
205
+ "eval_validation_scarebleu_sys_len": 31567,
206
+ "eval_validation_scarebleu_totals": [
207
+ 31567,
208
+ 30586,
209
+ 29618,
210
+ 28656
211
+ ],
212
+ "eval_validation_steps_per_second": 0.399,
213
+ "eval_validation_ter_num_edits": 22118,
214
+ "eval_validation_ter_ref_length": 16729.0,
215
+ "eval_validation_ter_score": 132.21352142985234,
216
+ "step": 3000
217
+ },
218
+ {
219
+ "epoch": 0.82,
220
+ "eval_test_chrf_beta": 2,
221
+ "eval_test_chrf_char_order": 6,
222
+ "eval_test_chrf_score": 13.044816904787435,
223
+ "eval_test_chrf_word_order": 2,
224
+ "eval_test_loss": 6.5189080238342285,
225
+ "eval_test_meteor": 0.11759299622619233,
226
+ "eval_test_runtime": 39.9506,
227
+ "eval_test_samples_per_second": 25.331,
228
+ "eval_test_scarebleu_bp": 1.0,
229
+ "eval_test_scarebleu_counts": [
230
+ 4139,
231
+ 590,
232
+ 121,
233
+ 25
234
+ ],
235
+ "eval_test_scarebleu_precisions": [
236
+ 12.661364331599877,
237
+ 1.8612574529164958,
238
+ 0.39385456675997654,
239
+ 0.08403361344537816
240
+ ],
241
+ "eval_test_scarebleu_ref_len": 20238,
242
+ "eval_test_scarebleu_score": 0.9397643584535246,
243
+ "eval_test_scarebleu_sys_len": 32690,
244
+ "eval_test_scarebleu_totals": [
245
+ 32690,
246
+ 31699,
247
+ 30722,
248
+ 29750
249
+ ],
250
+ "eval_test_steps_per_second": 0.4,
251
+ "eval_test_ter_num_edits": 23506,
252
+ "eval_test_ter_ref_length": 17367.0,
253
+ "eval_test_ter_score": 135.34864973800887,
254
+ "step": 3000
255
+ },
256
+ {
257
+ "epoch": 1.09,
258
+ "learning_rate": 0.00029999444082175083,
259
+ "loss": 5.8284,
260
+ "step": 4000
261
+ },
262
+ {
263
+ "epoch": 1.09,
264
+ "eval_validation_chrf_beta": 2,
265
+ "eval_validation_chrf_char_order": 6,
266
+ "eval_validation_chrf_score": 15.697678446835747,
267
+ "eval_validation_chrf_word_order": 2,
268
+ "eval_validation_loss": 5.613649368286133,
269
+ "eval_validation_meteor": 0.14151503184835326,
270
+ "eval_validation_runtime": 37.324,
271
+ "eval_validation_samples_per_second": 26.712,
272
+ "eval_validation_scarebleu_bp": 1.0,
273
+ "eval_validation_scarebleu_counts": [
274
+ 4311,
275
+ 658,
276
+ 141,
277
+ 30
278
+ ],
279
+ "eval_validation_scarebleu_precisions": [
280
+ 21.276280722534793,
281
+ 3.413925495486147,
282
+ 0.7710395362826051,
283
+ 0.17339035949601203
284
+ ],
285
+ "eval_validation_scarebleu_ref_len": 19414,
286
+ "eval_validation_scarebleu_score": 1.7652768928036136,
287
+ "eval_validation_scarebleu_sys_len": 20262,
288
+ "eval_validation_scarebleu_totals": [
289
+ 20262,
290
+ 19274,
291
+ 18287,
292
+ 17302
293
+ ],
294
+ "eval_validation_steps_per_second": 0.429,
295
+ "eval_validation_ter_num_edits": 18183,
296
+ "eval_validation_ter_ref_length": 16729.0,
297
+ "eval_validation_ter_score": 108.69149381313885,
298
+ "step": 4000
299
+ },
300
+ {
301
+ "epoch": 1.09,
302
+ "eval_test_chrf_beta": 2,
303
+ "eval_test_chrf_char_order": 6,
304
+ "eval_test_chrf_score": 15.824281404133039,
305
+ "eval_test_chrf_word_order": 2,
306
+ "eval_test_loss": 5.644526481628418,
307
+ "eval_test_meteor": 0.14262643610521655,
308
+ "eval_test_runtime": 37.2514,
309
+ "eval_test_samples_per_second": 27.167,
310
+ "eval_test_scarebleu_bp": 1.0,
311
+ "eval_test_scarebleu_counts": [
312
+ 4480,
313
+ 750,
314
+ 151,
315
+ 21
316
+ ],
317
+ "eval_test_scarebleu_precisions": [
318
+ 21.725425537073857,
319
+ 3.8230196758079313,
320
+ 0.8109995166228047,
321
+ 0.11913541725761616
322
+ ],
323
+ "eval_test_scarebleu_ref_len": 20238,
324
+ "eval_test_scarebleu_score": 1.683096735458044,
325
+ "eval_test_scarebleu_sys_len": 20621,
326
+ "eval_test_scarebleu_totals": [
327
+ 20621,
328
+ 19618,
329
+ 18619,
330
+ 17627
331
+ ],
332
+ "eval_test_steps_per_second": 0.43,
333
+ "eval_test_ter_num_edits": 18915,
334
+ "eval_test_ter_ref_length": 17367.0,
335
+ "eval_test_ter_score": 108.91345655553637,
336
+ "step": 4000
337
+ },
338
+ {
339
+ "epoch": 1.37,
340
+ "learning_rate": 0.0002999874919455162,
341
+ "loss": 4.9666,
342
+ "step": 5000
343
+ },
344
+ {
345
+ "epoch": 1.37,
346
+ "eval_validation_chrf_beta": 2,
347
+ "eval_validation_chrf_char_order": 6,
348
+ "eval_validation_chrf_score": 19.94602423909563,
349
+ "eval_validation_chrf_word_order": 2,
350
+ "eval_validation_loss": 5.06654691696167,
351
+ "eval_validation_meteor": 0.16677714234198524,
352
+ "eval_validation_runtime": 35.8549,
353
+ "eval_validation_samples_per_second": 27.806,
354
+ "eval_validation_scarebleu_bp": 1.0,
355
+ "eval_validation_scarebleu_counts": [
356
+ 5239,
357
+ 885,
358
+ 180,
359
+ 35
360
+ ],
361
+ "eval_validation_scarebleu_precisions": [
362
+ 20.370154360589446,
363
+ 3.579807458943451,
364
+ 0.7586933614330874,
365
+ 0.1539815222173339
366
+ ],
367
+ "eval_validation_scarebleu_ref_len": 19414,
368
+ "eval_validation_scarebleu_score": 1.70843012306825,
369
+ "eval_validation_scarebleu_sys_len": 25719,
370
+ "eval_validation_scarebleu_totals": [
371
+ 25719,
372
+ 24722,
373
+ 23725,
374
+ 22730
375
+ ],
376
+ "eval_validation_steps_per_second": 0.446,
377
+ "eval_validation_ter_num_edits": 20131,
378
+ "eval_validation_ter_ref_length": 16729.0,
379
+ "eval_validation_ter_score": 120.3359435710443,
380
+ "step": 5000
381
+ },
382
+ {
383
+ "epoch": 1.37,
384
+ "eval_test_chrf_beta": 2,
385
+ "eval_test_chrf_char_order": 6,
386
+ "eval_test_chrf_score": 19.73223665848102,
387
+ "eval_test_chrf_word_order": 2,
388
+ "eval_test_loss": 5.101978302001953,
389
+ "eval_test_meteor": 0.16941489704708929,
390
+ "eval_test_runtime": 40.2976,
391
+ "eval_test_samples_per_second": 25.113,
392
+ "eval_test_scarebleu_bp": 1.0,
393
+ "eval_test_scarebleu_counts": [
394
+ 5479,
395
+ 997,
396
+ 226,
397
+ 45
398
+ ],
399
+ "eval_test_scarebleu_precisions": [
400
+ 19.717853672580702,
401
+ 3.723622782446312,
402
+ 0.8771929824561403,
403
+ 0.18179614592170645
404
+ ],
405
+ "eval_test_scarebleu_ref_len": 20238,
406
+ "eval_test_scarebleu_score": 1.8498064298140473,
407
+ "eval_test_scarebleu_sys_len": 27787,
408
+ "eval_test_scarebleu_totals": [
409
+ 27787,
410
+ 26775,
411
+ 25764,
412
+ 24753
413
+ ],
414
+ "eval_test_steps_per_second": 0.397,
415
+ "eval_test_ter_num_edits": 21806,
416
+ "eval_test_ter_ref_length": 17367.0,
417
+ "eval_test_ter_score": 125.55997005815627,
418
+ "step": 5000
419
+ },
420
+ {
421
+ "epoch": 1.64,
422
+ "learning_rate": 0.00029997776369906286,
423
+ "loss": 4.4311,
424
+ "step": 6000
425
+ },
426
+ {
427
+ "epoch": 1.64,
428
+ "eval_validation_chrf_beta": 2,
429
+ "eval_validation_chrf_char_order": 6,
430
+ "eval_validation_chrf_score": 24.59127986581791,
431
+ "eval_validation_chrf_word_order": 2,
432
+ "eval_validation_loss": 4.524135112762451,
433
+ "eval_validation_meteor": 0.2102906308603433,
434
+ "eval_validation_runtime": 31.5423,
435
+ "eval_validation_samples_per_second": 31.608,
436
+ "eval_validation_scarebleu_bp": 1.0,
437
+ "eval_validation_scarebleu_counts": [
438
+ 5964,
439
+ 1335,
440
+ 396,
441
+ 121
442
+ ],
443
+ "eval_validation_scarebleu_precisions": [
444
+ 28.030267424918925,
445
+ 6.5828402366863905,
446
+ 2.053622361665716,
447
+ 0.6617084108060811
448
+ ],
449
+ "eval_validation_scarebleu_ref_len": 19414,
450
+ "eval_validation_scarebleu_score": 3.979302446849127,
451
+ "eval_validation_scarebleu_sys_len": 21277,
452
+ "eval_validation_scarebleu_totals": [
453
+ 21277,
454
+ 20280,
455
+ 19283,
456
+ 18286
457
+ ],
458
+ "eval_validation_steps_per_second": 0.507,
459
+ "eval_validation_ter_num_edits": 17139,
460
+ "eval_validation_ter_ref_length": 16729.0,
461
+ "eval_validation_ter_score": 102.450833881284,
462
+ "step": 6000
463
+ },
464
+ {
465
+ "epoch": 1.64,
466
+ "eval_test_chrf_beta": 2,
467
+ "eval_test_chrf_char_order": 6,
468
+ "eval_test_chrf_score": 24.41919902051296,
469
+ "eval_test_chrf_word_order": 2,
470
+ "eval_test_loss": 4.5522613525390625,
471
+ "eval_test_meteor": 0.2072945229916662,
472
+ "eval_test_runtime": 34.2408,
473
+ "eval_test_samples_per_second": 29.555,
474
+ "eval_test_scarebleu_bp": 1.0,
475
+ "eval_test_scarebleu_counts": [
476
+ 6159,
477
+ 1354,
478
+ 372,
479
+ 84
480
+ ],
481
+ "eval_test_scarebleu_precisions": [
482
+ 27.975109011627907,
483
+ 6.446391163587888,
484
+ 1.8607442977190876,
485
+ 0.44252449689179224
486
+ ],
487
+ "eval_test_scarebleu_ref_len": 20238,
488
+ "eval_test_scarebleu_score": 3.4908252929483026,
489
+ "eval_test_scarebleu_sys_len": 22016,
490
+ "eval_test_scarebleu_totals": [
491
+ 22016,
492
+ 21004,
493
+ 19992,
494
+ 18982
495
+ ],
496
+ "eval_test_steps_per_second": 0.467,
497
+ "eval_test_ter_num_edits": 17741,
498
+ "eval_test_ter_ref_length": 17367.0,
499
+ "eval_test_ter_score": 102.15350952956757,
500
+ "step": 6000
501
+ },
502
+ {
503
+ "epoch": 1.91,
504
+ "learning_rate": 0.00029996525626266166,
505
+ "loss": 3.8914,
506
+ "step": 7000
507
+ },
508
+ {
509
+ "epoch": 1.91,
510
+ "eval_validation_chrf_beta": 2,
511
+ "eval_validation_chrf_char_order": 6,
512
+ "eval_validation_chrf_score": 28.001456752933812,
513
+ "eval_validation_chrf_word_order": 2,
514
+ "eval_validation_loss": 4.09087610244751,
515
+ "eval_validation_meteor": 0.246925162242717,
516
+ "eval_validation_runtime": 25.3477,
517
+ "eval_validation_samples_per_second": 39.333,
518
+ "eval_validation_scarebleu_bp": 0.9367877834918877,
519
+ "eval_validation_scarebleu_counts": [
520
+ 6414,
521
+ 1774,
522
+ 615,
523
+ 220
524
+ ],
525
+ "eval_validation_scarebleu_precisions": [
526
+ 35.19534679543459,
527
+ 10.297190619921059,
528
+ 3.7888122227698373,
529
+ 1.4439485429246521
530
+ ],
531
+ "eval_validation_scarebleu_ref_len": 19414,
532
+ "eval_validation_scarebleu_score": 6.251092400807247,
533
+ "eval_validation_scarebleu_sys_len": 18224,
534
+ "eval_validation_scarebleu_totals": [
535
+ 18224,
536
+ 17228,
537
+ 16232,
538
+ 15236
539
+ ],
540
+ "eval_validation_steps_per_second": 0.631,
541
+ "eval_validation_ter_num_edits": 14680,
542
+ "eval_validation_ter_ref_length": 16729.0,
543
+ "eval_validation_ter_score": 87.75180823719289,
544
+ "step": 7000
545
+ },
546
+ {
547
+ "epoch": 1.91,
548
+ "eval_test_chrf_beta": 2,
549
+ "eval_test_chrf_char_order": 6,
550
+ "eval_test_chrf_score": 27.737380160414922,
551
+ "eval_test_chrf_word_order": 2,
552
+ "eval_test_loss": 4.100297451019287,
553
+ "eval_test_meteor": 0.23872126072037225,
554
+ "eval_test_runtime": 27.793,
555
+ "eval_test_samples_per_second": 36.412,
556
+ "eval_test_scarebleu_bp": 0.9496588509867531,
557
+ "eval_test_scarebleu_counts": [
558
+ 6585,
559
+ 1745,
560
+ 575,
561
+ 175
562
+ ],
563
+ "eval_test_scarebleu_precisions": [
564
+ 34.21845770110164,
565
+ 9.571083808688021,
566
+ 3.3391405342624854,
567
+ 1.0796471096304523
568
+ ],
569
+ "eval_test_scarebleu_ref_len": 20238,
570
+ "eval_test_scarebleu_score": 5.566755396990573,
571
+ "eval_test_scarebleu_sys_len": 19244,
572
+ "eval_test_scarebleu_totals": [
573
+ 19244,
574
+ 18232,
575
+ 17220,
576
+ 16209
577
+ ],
578
+ "eval_test_steps_per_second": 0.576,
579
+ "eval_test_ter_num_edits": 15506,
580
+ "eval_test_ter_ref_length": 17367.0,
581
+ "eval_test_ter_score": 89.28427477399666,
582
+ "step": 7000
583
+ },
584
+ {
585
+ "epoch": 2.19,
586
+ "learning_rate": 0.000299949969868084,
587
+ "loss": 3.4344,
588
+ "step": 8000
589
+ },
590
+ {
591
+ "epoch": 2.19,
592
+ "eval_validation_chrf_beta": 2,
593
+ "eval_validation_chrf_char_order": 6,
594
+ "eval_validation_chrf_score": 31.29398951552832,
595
+ "eval_validation_chrf_word_order": 2,
596
+ "eval_validation_loss": 3.7306714057922363,
597
+ "eval_validation_meteor": 0.27820026125205044,
598
+ "eval_validation_runtime": 18.376,
599
+ "eval_validation_samples_per_second": 54.256,
600
+ "eval_validation_scarebleu_bp": 0.9889681301158993,
601
+ "eval_validation_scarebleu_counts": [
602
+ 7069,
603
+ 2158,
604
+ 789,
605
+ 302
606
+ ],
607
+ "eval_validation_scarebleu_precisions": [
608
+ 36.81579084422686,
609
+ 11.854537464293562,
610
+ 4.585343174289533,
611
+ 1.8630475015422578
612
+ ],
613
+ "eval_validation_scarebleu_ref_len": 19414,
614
+ "eval_validation_scarebleu_score": 7.727889909602072,
615
+ "eval_validation_scarebleu_sys_len": 19201,
616
+ "eval_validation_scarebleu_totals": [
617
+ 19201,
618
+ 18204,
619
+ 17207,
620
+ 16210
621
+ ],
622
+ "eval_validation_steps_per_second": 0.871,
623
+ "eval_validation_ter_num_edits": 14351,
624
+ "eval_validation_ter_ref_length": 16729.0,
625
+ "eval_validation_ter_score": 85.78516348855281,
626
+ "step": 8000
627
+ },
628
+ {
629
+ "epoch": 2.19,
630
+ "eval_test_chrf_beta": 2,
631
+ "eval_test_chrf_char_order": 6,
632
+ "eval_test_chrf_score": 31.913351815650685,
633
+ "eval_test_chrf_word_order": 2,
634
+ "eval_test_loss": 3.728149890899658,
635
+ "eval_test_meteor": 0.2797376842090434,
636
+ "eval_test_runtime": 22.9959,
637
+ "eval_test_samples_per_second": 44.008,
638
+ "eval_test_scarebleu_bp": 0.9872701879268588,
639
+ "eval_test_scarebleu_counts": [
640
+ 7428,
641
+ 2283,
642
+ 851,
643
+ 319
644
+ ],
645
+ "eval_test_scarebleu_precisions": [
646
+ 37.17345611049945,
647
+ 12.034791776489193,
648
+ 4.738835059583472,
649
+ 1.882450135725245
650
+ ],
651
+ "eval_test_scarebleu_ref_len": 20238,
652
+ "eval_test_scarebleu_score": 7.846982277886522,
653
+ "eval_test_scarebleu_sys_len": 19982,
654
+ "eval_test_scarebleu_totals": [
655
+ 19982,
656
+ 18970,
657
+ 17958,
658
+ 16946
659
+ ],
660
+ "eval_test_steps_per_second": 0.696,
661
+ "eval_test_ter_num_edits": 14894,
662
+ "eval_test_ter_ref_length": 17367.0,
663
+ "eval_test_ter_score": 85.76035008924973,
664
+ "step": 8000
665
+ },
666
+ {
667
+ "epoch": 2.46,
668
+ "learning_rate": 0.0002999319047985972,
669
+ "loss": 3.1209,
670
+ "step": 9000
671
+ },
672
+ {
673
+ "epoch": 2.46,
674
+ "eval_validation_chrf_beta": 2,
675
+ "eval_validation_chrf_char_order": 6,
676
+ "eval_validation_chrf_score": 33.55090391489323,
677
+ "eval_validation_chrf_word_order": 2,
678
+ "eval_validation_loss": 3.4695987701416016,
679
+ "eval_validation_meteor": 0.2976359335029889,
680
+ "eval_validation_runtime": 21.7643,
681
+ "eval_validation_samples_per_second": 45.809,
682
+ "eval_validation_scarebleu_bp": 1.0,
683
+ "eval_validation_scarebleu_counts": [
684
+ 7463,
685
+ 2467,
686
+ 1011,
687
+ 418
688
+ ],
689
+ "eval_validation_scarebleu_precisions": [
690
+ 37.90056370930882,
691
+ 13.196747619557078,
692
+ 5.712832683505679,
693
+ 2.502994011976048
694
+ ],
695
+ "eval_validation_scarebleu_ref_len": 19414,
696
+ "eval_validation_scarebleu_score": 9.196148464160226,
697
+ "eval_validation_scarebleu_sys_len": 19691,
698
+ "eval_validation_scarebleu_totals": [
699
+ 19691,
700
+ 18694,
701
+ 17697,
702
+ 16700
703
+ ],
704
+ "eval_validation_steps_per_second": 0.735,
705
+ "eval_validation_ter_num_edits": 14205,
706
+ "eval_validation_ter_ref_length": 16729.0,
707
+ "eval_validation_ter_score": 84.9124275210712,
708
+ "step": 9000
709
+ },
710
+ {
711
+ "epoch": 2.46,
712
+ "eval_test_chrf_beta": 2,
713
+ "eval_test_chrf_char_order": 6,
714
+ "eval_test_chrf_score": 33.9716211383833,
715
+ "eval_test_chrf_word_order": 2,
716
+ "eval_test_loss": 3.4928810596466064,
717
+ "eval_test_meteor": 0.3022870093526768,
718
+ "eval_test_runtime": 25.0697,
719
+ "eval_test_samples_per_second": 40.367,
720
+ "eval_test_scarebleu_bp": 1.0,
721
+ "eval_test_scarebleu_counts": [
722
+ 7869,
723
+ 2633,
724
+ 1054,
725
+ 427
726
+ ],
727
+ "eval_test_scarebleu_precisions": [
728
+ 38.3741343996879,
729
+ 13.506720016415308,
730
+ 5.702846012336328,
731
+ 2.444190040068689
732
+ ],
733
+ "eval_test_scarebleu_ref_len": 20238,
734
+ "eval_test_scarebleu_score": 9.219423736514582,
735
+ "eval_test_scarebleu_sys_len": 20506,
736
+ "eval_test_scarebleu_totals": [
737
+ 20506,
738
+ 19494,
739
+ 18482,
740
+ 17470
741
+ ],
742
+ "eval_test_steps_per_second": 0.638,
743
+ "eval_test_ter_num_edits": 14739,
744
+ "eval_test_ter_ref_length": 17367.0,
745
+ "eval_test_ter_score": 84.86785282432199,
746
+ "step": 9000
747
+ },
748
+ {
749
+ "epoch": 2.73,
750
+ "learning_rate": 0.00029991106138895916,
751
+ "loss": 2.9118,
752
+ "step": 10000
753
+ },
754
+ {
755
+ "epoch": 2.73,
756
+ "eval_validation_chrf_beta": 2,
757
+ "eval_validation_chrf_char_order": 6,
758
+ "eval_validation_chrf_score": 34.57697226346535,
759
+ "eval_validation_chrf_word_order": 2,
760
+ "eval_validation_loss": 3.2827696800231934,
761
+ "eval_validation_meteor": 0.30655372675156295,
762
+ "eval_validation_runtime": 21.2242,
763
+ "eval_validation_samples_per_second": 46.975,
764
+ "eval_validation_scarebleu_bp": 1.0,
765
+ "eval_validation_scarebleu_counts": [
766
+ 7614,
767
+ 2604,
768
+ 1072,
769
+ 445
770
+ ],
771
+ "eval_validation_scarebleu_precisions": [
772
+ 39.094269870609985,
773
+ 14.091671627252557,
774
+ 6.132021507836632,
775
+ 2.6994237185319987
776
+ ],
777
+ "eval_validation_scarebleu_ref_len": 19414,
778
+ "eval_validation_scarebleu_score": 9.772092910936896,
779
+ "eval_validation_scarebleu_sys_len": 19476,
780
+ "eval_validation_scarebleu_totals": [
781
+ 19476,
782
+ 18479,
783
+ 17482,
784
+ 16485
785
+ ],
786
+ "eval_validation_steps_per_second": 0.754,
787
+ "eval_validation_ter_num_edits": 13943,
788
+ "eval_validation_ter_ref_length": 16729.0,
789
+ "eval_validation_ter_score": 83.34628489449459,
790
+ "step": 10000
791
+ },
792
+ {
793
+ "epoch": 2.73,
794
+ "eval_test_chrf_beta": 2,
795
+ "eval_test_chrf_char_order": 6,
796
+ "eval_test_chrf_score": 35.254931037556965,
797
+ "eval_test_chrf_word_order": 2,
798
+ "eval_test_loss": 3.296231985092163,
799
+ "eval_test_meteor": 0.3139488805447367,
800
+ "eval_test_runtime": 23.0481,
801
+ "eval_test_samples_per_second": 43.908,
802
+ "eval_test_scarebleu_bp": 0.9980710741204407,
803
+ "eval_test_scarebleu_counts": [
804
+ 8108,
805
+ 2823,
806
+ 1184,
807
+ 513
808
+ ],
809
+ "eval_test_scarebleu_precisions": [
810
+ 40.14060101985247,
811
+ 14.713086985980091,
812
+ 6.514442916093535,
813
+ 2.9889879391714733
814
+ ],
815
+ "eval_test_scarebleu_ref_len": 20238,
816
+ "eval_test_scarebleu_score": 10.335553726245266,
817
+ "eval_test_scarebleu_sys_len": 20199,
818
+ "eval_test_scarebleu_totals": [
819
+ 20199,
820
+ 19187,
821
+ 18175,
822
+ 17163
823
+ ],
824
+ "eval_test_steps_per_second": 0.694,
825
+ "eval_test_ter_num_edits": 14363,
826
+ "eval_test_ter_ref_length": 17367.0,
827
+ "eval_test_ter_score": 82.70282720101342,
828
+ "step": 10000
829
+ },
830
+ {
831
+ "epoch": 3.01,
832
+ "learning_rate": 0.0002998874400254125,
833
+ "loss": 2.7563,
834
+ "step": 11000
835
+ },
836
+ {
837
+ "epoch": 3.01,
838
+ "eval_validation_chrf_beta": 2,
839
+ "eval_validation_chrf_char_order": 6,
840
+ "eval_validation_chrf_score": 35.922822688896666,
841
+ "eval_validation_chrf_word_order": 2,
842
+ "eval_validation_loss": 3.1191513538360596,
843
+ "eval_validation_meteor": 0.3225733936623777,
844
+ "eval_validation_runtime": 18.9973,
845
+ "eval_validation_samples_per_second": 52.481,
846
+ "eval_validation_scarebleu_bp": 0.9987114400155332,
847
+ "eval_validation_scarebleu_counts": [
848
+ 7893,
849
+ 2765,
850
+ 1154,
851
+ 506
852
+ ],
853
+ "eval_validation_scarebleu_precisions": [
854
+ 40.70864923410181,
855
+ 15.033710308829926,
856
+ 6.634090255820638,
857
+ 3.085742163678497
858
+ ],
859
+ "eval_validation_scarebleu_ref_len": 19414,
860
+ "eval_validation_scarebleu_score": 10.56607027026474,
861
+ "eval_validation_scarebleu_sys_len": 19389,
862
+ "eval_validation_scarebleu_totals": [
863
+ 19389,
864
+ 18392,
865
+ 17395,
866
+ 16398
867
+ ],
868
+ "eval_validation_steps_per_second": 0.842,
869
+ "eval_validation_ter_num_edits": 13631,
870
+ "eval_validation_ter_ref_length": 16729.0,
871
+ "eval_validation_ter_score": 81.4812600872736,
872
+ "step": 11000
873
+ },
874
+ {
875
+ "epoch": 3.01,
876
+ "eval_test_chrf_beta": 2,
877
+ "eval_test_chrf_char_order": 6,
878
+ "eval_test_chrf_score": 36.3162560210105,
879
+ "eval_test_chrf_word_order": 2,
880
+ "eval_test_loss": 3.134953737258911,
881
+ "eval_test_meteor": 0.3246997818548813,
882
+ "eval_test_runtime": 22.6178,
883
+ "eval_test_samples_per_second": 44.744,
884
+ "eval_test_scarebleu_bp": 1.0,
885
+ "eval_test_scarebleu_counts": [
886
+ 8246,
887
+ 2958,
888
+ 1263,
889
+ 563
890
+ ],
891
+ "eval_test_scarebleu_precisions": [
892
+ 40.70088845014808,
893
+ 15.36783042394015,
894
+ 6.925860934415442,
895
+ 3.2686948444031585
896
+ ],
897
+ "eval_test_scarebleu_ref_len": 20238,
898
+ "eval_test_scarebleu_score": 10.90852794539211,
899
+ "eval_test_scarebleu_sys_len": 20260,
900
+ "eval_test_scarebleu_totals": [
901
+ 20260,
902
+ 19248,
903
+ 18236,
904
+ 17224
905
+ ],
906
+ "eval_test_steps_per_second": 0.707,
907
+ "eval_test_ter_num_edits": 14265,
908
+ "eval_test_ter_ref_length": 17367.0,
909
+ "eval_test_ter_score": 82.13853860770428,
910
+ "step": 11000
911
+ },
912
+ {
913
+ "epoch": 3.28,
914
+ "learning_rate": 0.0002998610411456772,
915
+ "loss": 2.5851,
916
+ "step": 12000
917
+ },
918
+ {
919
+ "epoch": 3.28,
920
+ "eval_validation_chrf_beta": 2,
921
+ "eval_validation_chrf_char_order": 6,
922
+ "eval_validation_chrf_score": 36.76732059324029,
923
+ "eval_validation_chrf_word_order": 2,
924
+ "eval_validation_loss": 2.992377996444702,
925
+ "eval_validation_meteor": 0.3322433302283302,
926
+ "eval_validation_runtime": 19.5358,
927
+ "eval_validation_samples_per_second": 51.034,
928
+ "eval_validation_scarebleu_bp": 1.0,
929
+ "eval_validation_scarebleu_counts": [
930
+ 8059,
931
+ 2918,
932
+ 1246,
933
+ 534
934
+ ],
935
+ "eval_validation_scarebleu_precisions": [
936
+ 41.24782475176579,
937
+ 15.738093953939917,
938
+ 7.102143182854538,
939
+ 3.2269760696156635
940
+ ],
941
+ "eval_validation_scarebleu_ref_len": 19414,
942
+ "eval_validation_scarebleu_score": 11.044208276358312,
943
+ "eval_validation_scarebleu_sys_len": 19538,
944
+ "eval_validation_scarebleu_totals": [
945
+ 19538,
946
+ 18541,
947
+ 17544,
948
+ 16548
949
+ ],
950
+ "eval_validation_steps_per_second": 0.819,
951
+ "eval_validation_ter_num_edits": 13514,
952
+ "eval_validation_ter_ref_length": 16729.0,
953
+ "eval_validation_ter_score": 80.78187578456573,
954
+ "step": 12000
955
+ },
956
+ {
957
+ "epoch": 3.28,
958
+ "eval_test_chrf_beta": 2,
959
+ "eval_test_chrf_char_order": 6,
960
+ "eval_test_chrf_score": 37.26551462594263,
961
+ "eval_test_chrf_word_order": 2,
962
+ "eval_test_loss": 3.0072576999664307,
963
+ "eval_test_meteor": 0.333414935567908,
964
+ "eval_test_runtime": 23.446,
965
+ "eval_test_samples_per_second": 43.163,
966
+ "eval_test_scarebleu_bp": 1.0,
967
+ "eval_test_scarebleu_counts": [
968
+ 8465,
969
+ 3091,
970
+ 1327,
971
+ 584
972
+ ],
973
+ "eval_test_scarebleu_precisions": [
974
+ 41.44836703716398,
975
+ 15.923960640873732,
976
+ 7.212348497200935,
977
+ 3.3588313107494105
978
+ ],
979
+ "eval_test_scarebleu_ref_len": 20238,
980
+ "eval_test_scarebleu_score": 11.244906644276366,
981
+ "eval_test_scarebleu_sys_len": 20423,
982
+ "eval_test_scarebleu_totals": [
983
+ 20423,
984
+ 19411,
985
+ 18399,
986
+ 17387
987
+ ],
988
+ "eval_test_steps_per_second": 0.682,
989
+ "eval_test_ter_num_edits": 14135,
990
+ "eval_test_ter_ref_length": 17367.0,
991
+ "eval_test_ter_score": 81.38999251453907,
992
+ "step": 12000
993
+ },
994
+ {
995
+ "epoch": 3.55,
996
+ "learning_rate": 0.00029983186523894237,
997
+ "loss": 2.4949,
998
+ "step": 13000
999
+ },
1000
+ {
1001
+ "epoch": 3.55,
1002
+ "eval_validation_chrf_beta": 2,
1003
+ "eval_validation_chrf_char_order": 6,
1004
+ "eval_validation_chrf_score": 37.97775302751643,
1005
+ "eval_validation_chrf_word_order": 2,
1006
+ "eval_validation_loss": 2.9026131629943848,
1007
+ "eval_validation_meteor": 0.34218163692855424,
1008
+ "eval_validation_runtime": 18.3627,
1009
+ "eval_validation_samples_per_second": 54.295,
1010
+ "eval_validation_scarebleu_bp": 1.0,
1011
+ "eval_validation_scarebleu_counts": [
1012
+ 8292,
1013
+ 3082,
1014
+ 1350,
1015
+ 611
1016
+ ],
1017
+ "eval_validation_scarebleu_precisions": [
1018
+ 42.28886168910649,
1019
+ 16.56009886626189,
1020
+ 7.664357897127285,
1021
+ 3.6769573328518987
1022
+ ],
1023
+ "eval_validation_scarebleu_ref_len": 19414,
1024
+ "eval_validation_scarebleu_score": 11.852591640319904,
1025
+ "eval_validation_scarebleu_sys_len": 19608,
1026
+ "eval_validation_scarebleu_totals": [
1027
+ 19608,
1028
+ 18611,
1029
+ 17614,
1030
+ 16617
1031
+ ],
1032
+ "eval_validation_steps_per_second": 0.871,
1033
+ "eval_validation_ter_num_edits": 13331,
1034
+ "eval_validation_ter_ref_length": 16729.0,
1035
+ "eval_validation_ter_score": 79.68796700340725,
1036
+ "step": 13000
1037
+ },
1038
+ {
1039
+ "epoch": 3.55,
1040
+ "eval_test_chrf_beta": 2,
1041
+ "eval_test_chrf_char_order": 6,
1042
+ "eval_test_chrf_score": 38.64688755179878,
1043
+ "eval_test_chrf_word_order": 2,
1044
+ "eval_test_loss": 2.9025168418884277,
1045
+ "eval_test_meteor": 0.3488289320983426,
1046
+ "eval_test_runtime": 23.8956,
1047
+ "eval_test_samples_per_second": 42.351,
1048
+ "eval_test_scarebleu_bp": 1.0,
1049
+ "eval_test_scarebleu_counts": [
1050
+ 8783,
1051
+ 3333,
1052
+ 1490,
1053
+ 682
1054
+ ],
1055
+ "eval_test_scarebleu_precisions": [
1056
+ 42.666990527082824,
1057
+ 17.02855975067695,
1058
+ 8.027584720650827,
1059
+ 3.8862613254316485
1060
+ ],
1061
+ "eval_test_scarebleu_ref_len": 20238,
1062
+ "eval_test_scarebleu_score": 12.270060576985161,
1063
+ "eval_test_scarebleu_sys_len": 20585,
1064
+ "eval_test_scarebleu_totals": [
1065
+ 20585,
1066
+ 19573,
1067
+ 18561,
1068
+ 17549
1069
+ ],
1070
+ "eval_test_steps_per_second": 0.67,
1071
+ "eval_test_ter_num_edits": 13913,
1072
+ "eval_test_ter_ref_length": 17367.0,
1073
+ "eval_test_ter_score": 80.11170610928772,
1074
+ "step": 13000
1075
+ },
1076
+ {
1077
+ "epoch": 3.83,
1078
+ "learning_rate": 0.0002997999128458575,
1079
+ "loss": 2.4153,
1080
+ "step": 14000
1081
+ },
1082
+ {
1083
+ "epoch": 3.83,
1084
+ "eval_validation_chrf_beta": 2,
1085
+ "eval_validation_chrf_char_order": 6,
1086
+ "eval_validation_chrf_score": 38.410061864122575,
1087
+ "eval_validation_chrf_word_order": 2,
1088
+ "eval_validation_loss": 2.802281141281128,
1089
+ "eval_validation_meteor": 0.3482320817009387,
1090
+ "eval_validation_runtime": 20.1679,
1091
+ "eval_validation_samples_per_second": 49.435,
1092
+ "eval_validation_scarebleu_bp": 1.0,
1093
+ "eval_validation_scarebleu_counts": [
1094
+ 8386,
1095
+ 3169,
1096
+ 1413,
1097
+ 644
1098
+ ],
1099
+ "eval_validation_scarebleu_precisions": [
1100
+ 42.807554874936194,
1101
+ 17.044048835583283,
1102
+ 8.030234144123664,
1103
+ 3.879751792276643
1104
+ ],
1105
+ "eval_validation_scarebleu_ref_len": 19414,
1106
+ "eval_validation_scarebleu_score": 12.278811597675354,
1107
+ "eval_validation_scarebleu_sys_len": 19590,
1108
+ "eval_validation_scarebleu_totals": [
1109
+ 19590,
1110
+ 18593,
1111
+ 17596,
1112
+ 16599
1113
+ ],
1114
+ "eval_validation_steps_per_second": 0.793,
1115
+ "eval_validation_ter_num_edits": 13285,
1116
+ "eval_validation_ter_ref_length": 16729.0,
1117
+ "eval_validation_ter_score": 79.41299539721442,
1118
+ "step": 14000
1119
+ },
1120
+ {
1121
+ "epoch": 3.83,
1122
+ "eval_test_chrf_beta": 2,
1123
+ "eval_test_chrf_char_order": 6,
1124
+ "eval_test_chrf_score": 39.18817112230258,
1125
+ "eval_test_chrf_word_order": 2,
1126
+ "eval_test_loss": 2.8074586391448975,
1127
+ "eval_test_meteor": 0.35206418879072193,
1128
+ "eval_test_runtime": 22.9097,
1129
+ "eval_test_samples_per_second": 44.173,
1130
+ "eval_test_scarebleu_bp": 1.0,
1131
+ "eval_test_scarebleu_counts": [
1132
+ 8797,
1133
+ 3403,
1134
+ 1547,
1135
+ 713
1136
+ ],
1137
+ "eval_test_scarebleu_precisions": [
1138
+ 42.6273198623831,
1139
+ 17.34012738853503,
1140
+ 8.311395261376457,
1141
+ 4.0509061985114485
1142
+ ],
1143
+ "eval_test_scarebleu_ref_len": 20238,
1144
+ "eval_test_scarebleu_score": 12.560056952808758,
1145
+ "eval_test_scarebleu_sys_len": 20637,
1146
+ "eval_test_scarebleu_totals": [
1147
+ 20637,
1148
+ 19625,
1149
+ 18613,
1150
+ 17601
1151
+ ],
1152
+ "eval_test_steps_per_second": 0.698,
1153
+ "eval_test_ter_num_edits": 13882,
1154
+ "eval_test_ter_ref_length": 17367.0,
1155
+ "eval_test_ter_score": 79.93320665630218,
1156
+ "step": 14000
1157
+ },
1158
+ {
1159
+ "epoch": 4.1,
1160
+ "learning_rate": 0.000299765184558522,
1161
+ "loss": 2.331,
1162
+ "step": 15000
1163
+ },
1164
+ {
1165
+ "epoch": 4.1,
1166
+ "eval_validation_chrf_beta": 2,
1167
+ "eval_validation_chrf_char_order": 6,
1168
+ "eval_validation_chrf_score": 39.13729411872558,
1169
+ "eval_validation_chrf_word_order": 2,
1170
+ "eval_validation_loss": 2.7163193225860596,
1171
+ "eval_validation_meteor": 0.354395671848595,
1172
+ "eval_validation_runtime": 19.2961,
1173
+ "eval_validation_samples_per_second": 51.668,
1174
+ "eval_validation_scarebleu_bp": 1.0,
1175
+ "eval_validation_scarebleu_counts": [
1176
+ 8486,
1177
+ 3263,
1178
+ 1446,
1179
+ 664
1180
+ ],
1181
+ "eval_validation_scarebleu_precisions": [
1182
+ 43.388894569996936,
1183
+ 17.5798717741501,
1184
+ 8.23274880437258,
1185
+ 4.007967646526227
1186
+ ],
1187
+ "eval_validation_scarebleu_ref_len": 19414,
1188
+ "eval_validation_scarebleu_score": 12.595512408378779,
1189
+ "eval_validation_scarebleu_sys_len": 19558,
1190
+ "eval_validation_scarebleu_totals": [
1191
+ 19558,
1192
+ 18561,
1193
+ 17564,
1194
+ 16567
1195
+ ],
1196
+ "eval_validation_steps_per_second": 0.829,
1197
+ "eval_validation_ter_num_edits": 13087,
1198
+ "eval_validation_ter_ref_length": 16729.0,
1199
+ "eval_validation_ter_score": 78.22942196186263,
1200
+ "step": 15000
1201
+ },
1202
+ {
1203
+ "epoch": 4.1,
1204
+ "eval_test_chrf_beta": 2,
1205
+ "eval_test_chrf_char_order": 6,
1206
+ "eval_test_chrf_score": 39.693895702702456,
1207
+ "eval_test_chrf_word_order": 2,
1208
+ "eval_test_loss": 2.730414390563965,
1209
+ "eval_test_meteor": 0.35872327155724143,
1210
+ "eval_test_runtime": 23.3127,
1211
+ "eval_test_samples_per_second": 43.41,
1212
+ "eval_test_scarebleu_bp": 1.0,
1213
+ "eval_test_scarebleu_counts": [
1214
+ 8979,
1215
+ 3479,
1216
+ 1580,
1217
+ 745
1218
+ ],
1219
+ "eval_test_scarebleu_precisions": [
1220
+ 43.72321776392676,
1221
+ 17.819094447859044,
1222
+ 8.535004321521175,
1223
+ 4.257142857142857
1224
+ ],
1225
+ "eval_test_scarebleu_ref_len": 20238,
1226
+ "eval_test_scarebleu_score": 12.971193222930435,
1227
+ "eval_test_scarebleu_sys_len": 20536,
1228
+ "eval_test_scarebleu_totals": [
1229
+ 20536,
1230
+ 19524,
1231
+ 18512,
1232
+ 17500
1233
+ ],
1234
+ "eval_test_steps_per_second": 0.686,
1235
+ "eval_test_ter_num_edits": 13688,
1236
+ "eval_test_ter_ref_length": 17367.0,
1237
+ "eval_test_ter_score": 78.81614556342488,
1238
+ "step": 15000
1239
+ },
1240
+ {
1241
+ "epoch": 4.37,
1242
+ "learning_rate": 0.00029972768102047483,
1243
+ "loss": 2.2516,
1244
+ "step": 16000
1245
+ },
1246
+ {
1247
+ "epoch": 4.37,
1248
+ "eval_validation_chrf_beta": 2,
1249
+ "eval_validation_chrf_char_order": 6,
1250
+ "eval_validation_chrf_score": 39.53731351221887,
1251
+ "eval_validation_chrf_word_order": 2,
1252
+ "eval_validation_loss": 2.65919828414917,
1253
+ "eval_validation_meteor": 0.35875663369715244,
1254
+ "eval_validation_runtime": 18.3314,
1255
+ "eval_validation_samples_per_second": 54.387,
1256
+ "eval_validation_scarebleu_bp": 1.0,
1257
+ "eval_validation_scarebleu_counts": [
1258
+ 8559,
1259
+ 3305,
1260
+ 1499,
1261
+ 688
1262
+ ],
1263
+ "eval_validation_scarebleu_precisions": [
1264
+ 43.7442502299908,
1265
+ 17.79848133986752,
1266
+ 8.530616890507625,
1267
+ 4.150829562594269
1268
+ ],
1269
+ "eval_validation_scarebleu_ref_len": 19414,
1270
+ "eval_validation_scarebleu_score": 12.885605055056248,
1271
+ "eval_validation_scarebleu_sys_len": 19566,
1272
+ "eval_validation_scarebleu_totals": [
1273
+ 19566,
1274
+ 18569,
1275
+ 17572,
1276
+ 16575
1277
+ ],
1278
+ "eval_validation_steps_per_second": 0.873,
1279
+ "eval_validation_ter_num_edits": 13032,
1280
+ "eval_validation_ter_ref_length": 16729.0,
1281
+ "eval_validation_ter_score": 77.9006515631538,
1282
+ "step": 16000
1283
+ },
1284
+ {
1285
+ "epoch": 4.37,
1286
+ "eval_test_chrf_beta": 2,
1287
+ "eval_test_chrf_char_order": 6,
1288
+ "eval_test_chrf_score": 40.0170544194821,
1289
+ "eval_test_chrf_word_order": 2,
1290
+ "eval_test_loss": 2.6628172397613525,
1291
+ "eval_test_meteor": 0.364117400692827,
1292
+ "eval_test_runtime": 20.9827,
1293
+ "eval_test_samples_per_second": 48.23,
1294
+ "eval_test_scarebleu_bp": 1.0,
1295
+ "eval_test_scarebleu_counts": [
1296
+ 9039,
1297
+ 3538,
1298
+ 1634,
1299
+ 772
1300
+ ],
1301
+ "eval_test_scarebleu_precisions": [
1302
+ 44.11420204978038,
1303
+ 18.164082554677073,
1304
+ 8.848694898732806,
1305
+ 4.423054887131889
1306
+ ],
1307
+ "eval_test_scarebleu_ref_len": 20238,
1308
+ "eval_test_scarebleu_score": 13.307557800740135,
1309
+ "eval_test_scarebleu_sys_len": 20490,
1310
+ "eval_test_scarebleu_totals": [
1311
+ 20490,
1312
+ 19478,
1313
+ 18466,
1314
+ 17454
1315
+ ],
1316
+ "eval_test_steps_per_second": 0.763,
1317
+ "eval_test_ter_num_edits": 13615,
1318
+ "eval_test_ter_ref_length": 17367.0,
1319
+ "eval_test_ter_score": 78.39580814187828,
1320
+ "step": 16000
1321
+ },
1322
+ {
1323
+ "epoch": 4.65,
1324
+ "learning_rate": 0.00029968740292668196,
1325
+ "loss": 2.213,
1326
+ "step": 17000
1327
+ },
1328
+ {
1329
+ "epoch": 4.65,
1330
+ "eval_validation_chrf_beta": 2,
1331
+ "eval_validation_chrf_char_order": 6,
1332
+ "eval_validation_chrf_score": 40.099777569436895,
1333
+ "eval_validation_chrf_word_order": 2,
1334
+ "eval_validation_loss": 2.612889051437378,
1335
+ "eval_validation_meteor": 0.3648782076482497,
1336
+ "eval_validation_runtime": 18.3903,
1337
+ "eval_validation_samples_per_second": 54.213,
1338
+ "eval_validation_scarebleu_bp": 1.0,
1339
+ "eval_validation_scarebleu_counts": [
1340
+ 8663,
1341
+ 3436,
1342
+ 1573,
1343
+ 730
1344
+ ],
1345
+ "eval_validation_scarebleu_precisions": [
1346
+ 43.93002028397566,
1347
+ 18.351759867542594,
1348
+ 8.873970438903307,
1349
+ 4.363679837408093
1350
+ ],
1351
+ "eval_validation_scarebleu_ref_len": 19414,
1352
+ "eval_validation_scarebleu_score": 13.292372233680728,
1353
+ "eval_validation_scarebleu_sys_len": 19720,
1354
+ "eval_validation_scarebleu_totals": [
1355
+ 19720,
1356
+ 18723,
1357
+ 17726,
1358
+ 16729
1359
+ ],
1360
+ "eval_validation_steps_per_second": 0.87,
1361
+ "eval_validation_ter_num_edits": 12962,
1362
+ "eval_validation_ter_ref_length": 16729.0,
1363
+ "eval_validation_ter_score": 77.48221651025166,
1364
+ "step": 17000
1365
+ },
1366
+ {
1367
+ "epoch": 4.65,
1368
+ "eval_test_chrf_beta": 2,
1369
+ "eval_test_chrf_char_order": 6,
1370
+ "eval_test_chrf_score": 40.94861131064201,
1371
+ "eval_test_chrf_word_order": 2,
1372
+ "eval_test_loss": 2.625495195388794,
1373
+ "eval_test_meteor": 0.37167048896927946,
1374
+ "eval_test_runtime": 23.8134,
1375
+ "eval_test_samples_per_second": 42.497,
1376
+ "eval_test_scarebleu_bp": 1.0,
1377
+ "eval_test_scarebleu_counts": [
1378
+ 9193,
1379
+ 3711,
1380
+ 1745,
1381
+ 853
1382
+ ],
1383
+ "eval_test_scarebleu_precisions": [
1384
+ 44.511693216481866,
1385
+ 18.894149992362916,
1386
+ 9.367115787213484,
1387
+ 4.841914060282681
1388
+ ],
1389
+ "eval_test_scarebleu_ref_len": 20238,
1390
+ "eval_test_scarebleu_score": 13.975138151336937,
1391
+ "eval_test_scarebleu_sys_len": 20653,
1392
+ "eval_test_scarebleu_totals": [
1393
+ 20653,
1394
+ 19641,
1395
+ 18629,
1396
+ 17617
1397
+ ],
1398
+ "eval_test_steps_per_second": 0.672,
1399
+ "eval_test_ter_num_edits": 13520,
1400
+ "eval_test_ter_ref_length": 17367.0,
1401
+ "eval_test_ter_score": 77.84879368918062,
1402
+ "step": 17000
1403
+ },
1404
+ {
1405
+ "epoch": 4.92,
1406
+ "learning_rate": 0.00029964435102352384,
1407
+ "loss": 2.172,
1408
+ "step": 18000
1409
+ },
1410
+ {
1411
+ "epoch": 4.92,
1412
+ "eval_validation_chrf_beta": 2,
1413
+ "eval_validation_chrf_char_order": 6,
1414
+ "eval_validation_chrf_score": 40.57571982774164,
1415
+ "eval_validation_chrf_word_order": 2,
1416
+ "eval_validation_loss": 2.5481536388397217,
1417
+ "eval_validation_meteor": 0.3734694399709547,
1418
+ "eval_validation_runtime": 19.2312,
1419
+ "eval_validation_samples_per_second": 51.843,
1420
+ "eval_validation_scarebleu_bp": 1.0,
1421
+ "eval_validation_scarebleu_counts": [
1422
+ 8738,
1423
+ 3503,
1424
+ 1625,
1425
+ 771
1426
+ ],
1427
+ "eval_validation_scarebleu_precisions": [
1428
+ 44.85165794066317,
1429
+ 18.950500405734378,
1430
+ 9.292086001829826,
1431
+ 4.675277424049481
1432
+ ],
1433
+ "eval_validation_scarebleu_ref_len": 19414,
1434
+ "eval_validation_scarebleu_score": 13.862129372426606,
1435
+ "eval_validation_scarebleu_sys_len": 19482,
1436
+ "eval_validation_scarebleu_totals": [
1437
+ 19482,
1438
+ 18485,
1439
+ 17488,
1440
+ 16491
1441
+ ],
1442
+ "eval_validation_steps_per_second": 0.832,
1443
+ "eval_validation_ter_num_edits": 12785,
1444
+ "eval_validation_ter_ref_length": 16729.0,
1445
+ "eval_validation_ter_score": 76.42417359077052,
1446
+ "step": 18000
1447
+ },
1448
+ {
1449
+ "epoch": 4.92,
1450
+ "eval_test_chrf_beta": 2,
1451
+ "eval_test_chrf_char_order": 6,
1452
+ "eval_test_chrf_score": 41.3542278861197,
1453
+ "eval_test_chrf_word_order": 2,
1454
+ "eval_test_loss": 2.5593228340148926,
1455
+ "eval_test_meteor": 0.3781022025701054,
1456
+ "eval_test_runtime": 21.5553,
1457
+ "eval_test_samples_per_second": 46.949,
1458
+ "eval_test_scarebleu_bp": 1.0,
1459
+ "eval_test_scarebleu_counts": [
1460
+ 9240,
1461
+ 3803,
1462
+ 1804,
1463
+ 862
1464
+ ],
1465
+ "eval_test_scarebleu_precisions": [
1466
+ 45.42772861356932,
1467
+ 19.676117549668874,
1468
+ 9.84931207687268,
1469
+ 4.981507165973185
1470
+ ],
1471
+ "eval_test_scarebleu_ref_len": 20238,
1472
+ "eval_test_scarebleu_score": 14.471274188368865,
1473
+ "eval_test_scarebleu_sys_len": 20340,
1474
+ "eval_test_scarebleu_totals": [
1475
+ 20340,
1476
+ 19328,
1477
+ 18316,
1478
+ 17304
1479
+ ],
1480
+ "eval_test_steps_per_second": 0.742,
1481
+ "eval_test_ter_num_edits": 13336,
1482
+ "eval_test_ter_ref_length": 17367.0,
1483
+ "eval_test_ter_score": 76.78931306500834,
1484
+ "step": 18000
1485
+ },
1486
+ {
1487
+ "epoch": 5.19,
1488
+ "learning_rate": 0.0002995985261087815,
1489
+ "loss": 2.1011,
1490
+ "step": 19000
1491
+ },
1492
+ {
1493
+ "epoch": 5.19,
1494
+ "eval_validation_chrf_beta": 2,
1495
+ "eval_validation_chrf_char_order": 6,
1496
+ "eval_validation_chrf_score": 40.91683022147655,
1497
+ "eval_validation_chrf_word_order": 2,
1498
+ "eval_validation_loss": 2.509413957595825,
1499
+ "eval_validation_meteor": 0.3741422146344069,
1500
+ "eval_validation_runtime": 19.6815,
1501
+ "eval_validation_samples_per_second": 50.657,
1502
+ "eval_validation_scarebleu_bp": 1.0,
1503
+ "eval_validation_scarebleu_counts": [
1504
+ 8797,
1505
+ 3539,
1506
+ 1630,
1507
+ 777
1508
+ ],
1509
+ "eval_validation_scarebleu_precisions": [
1510
+ 44.709290506200446,
1511
+ 18.94641040740939,
1512
+ 9.218414206537721,
1513
+ 4.656877434821696
1514
+ ],
1515
+ "eval_validation_scarebleu_ref_len": 19414,
1516
+ "eval_validation_scarebleu_score": 13.809213245335025,
1517
+ "eval_validation_scarebleu_sys_len": 19676,
1518
+ "eval_validation_scarebleu_totals": [
1519
+ 19676,
1520
+ 18679,
1521
+ 17682,
1522
+ 16685
1523
+ ],
1524
+ "eval_validation_steps_per_second": 0.813,
1525
+ "eval_validation_ter_num_edits": 12842,
1526
+ "eval_validation_ter_ref_length": 16729.0,
1527
+ "eval_validation_ter_score": 76.76489927670512,
1528
+ "step": 19000
1529
+ },
1530
+ {
1531
+ "epoch": 5.19,
1532
+ "eval_test_chrf_beta": 2,
1533
+ "eval_test_chrf_char_order": 6,
1534
+ "eval_test_chrf_score": 41.61549355265803,
1535
+ "eval_test_chrf_word_order": 2,
1536
+ "eval_test_loss": 2.518157482147217,
1537
+ "eval_test_meteor": 0.3797284608780533,
1538
+ "eval_test_runtime": 19.8347,
1539
+ "eval_test_samples_per_second": 51.022,
1540
+ "eval_test_scarebleu_bp": 1.0,
1541
+ "eval_test_scarebleu_counts": [
1542
+ 9292,
1543
+ 3832,
1544
+ 1832,
1545
+ 898
1546
+ ],
1547
+ "eval_test_scarebleu_precisions": [
1548
+ 45.2297507788162,
1549
+ 19.61908662707352,
1550
+ 9.892008639308855,
1551
+ 5.129083847384053
1552
+ ],
1553
+ "eval_test_scarebleu_ref_len": 20238,
1554
+ "eval_test_scarebleu_score": 14.566553770170765,
1555
+ "eval_test_scarebleu_sys_len": 20544,
1556
+ "eval_test_scarebleu_totals": [
1557
+ 20544,
1558
+ 19532,
1559
+ 18520,
1560
+ 17508
1561
+ ],
1562
+ "eval_test_steps_per_second": 0.807,
1563
+ "eval_test_ter_num_edits": 13324,
1564
+ "eval_test_ter_ref_length": 17367.0,
1565
+ "eval_test_ter_score": 76.72021650256234,
1566
+ "step": 19000
1567
+ },
1568
+ {
1569
+ "epoch": 5.47,
1570
+ "learning_rate": 0.0002995499290316219,
1571
+ "loss": 2.0657,
1572
+ "step": 20000
1573
+ },
1574
+ {
1575
+ "epoch": 5.47,
1576
+ "eval_validation_chrf_beta": 2,
1577
+ "eval_validation_chrf_char_order": 6,
1578
+ "eval_validation_chrf_score": 41.094856625774376,
1579
+ "eval_validation_chrf_word_order": 2,
1580
+ "eval_validation_loss": 2.466625452041626,
1581
+ "eval_validation_meteor": 0.378875561890472,
1582
+ "eval_validation_runtime": 18.4931,
1583
+ "eval_validation_samples_per_second": 53.912,
1584
+ "eval_validation_scarebleu_bp": 1.0,
1585
+ "eval_validation_scarebleu_counts": [
1586
+ 8851,
1587
+ 3584,
1588
+ 1652,
1589
+ 766
1590
+ ],
1591
+ "eval_validation_scarebleu_precisions": [
1592
+ 44.70653601373876,
1593
+ 19.062815807669804,
1594
+ 9.278813749719165,
1595
+ 4.557624799190814
1596
+ ],
1597
+ "eval_validation_scarebleu_ref_len": 19414,
1598
+ "eval_validation_scarebleu_score": 13.778351975109262,
1599
+ "eval_validation_scarebleu_sys_len": 19798,
1600
+ "eval_validation_scarebleu_totals": [
1601
+ 19798,
1602
+ 18801,
1603
+ 17804,
1604
+ 16807
1605
+ ],
1606
+ "eval_validation_steps_per_second": 0.865,
1607
+ "eval_validation_ter_num_edits": 12843,
1608
+ "eval_validation_ter_ref_length": 16729.0,
1609
+ "eval_validation_ter_score": 76.77087692031802,
1610
+ "step": 20000
1611
+ },
1612
+ {
1613
+ "epoch": 5.47,
1614
+ "eval_test_chrf_beta": 2,
1615
+ "eval_test_chrf_char_order": 6,
1616
+ "eval_test_chrf_score": 42.01510354031067,
1617
+ "eval_test_chrf_word_order": 2,
1618
+ "eval_test_loss": 2.4802513122558594,
1619
+ "eval_test_meteor": 0.3865124149670556,
1620
+ "eval_test_runtime": 21.1754,
1621
+ "eval_test_samples_per_second": 47.791,
1622
+ "eval_test_scarebleu_bp": 1.0,
1623
+ "eval_test_scarebleu_counts": [
1624
+ 9418,
1625
+ 3919,
1626
+ 1907,
1627
+ 937
1628
+ ],
1629
+ "eval_test_scarebleu_precisions": [
1630
+ 45.62542389303362,
1631
+ 19.964340295466123,
1632
+ 10.242775808357504,
1633
+ 5.322049301374531
1634
+ ],
1635
+ "eval_test_scarebleu_ref_len": 20238,
1636
+ "eval_test_scarebleu_score": 14.927586937806508,
1637
+ "eval_test_scarebleu_sys_len": 20642,
1638
+ "eval_test_scarebleu_totals": [
1639
+ 20642,
1640
+ 19630,
1641
+ 18618,
1642
+ 17606
1643
+ ],
1644
+ "eval_test_steps_per_second": 0.756,
1645
+ "eval_test_ter_num_edits": 13273,
1646
+ "eval_test_ter_ref_length": 17367.0,
1647
+ "eval_test_ter_score": 76.42655611216675,
1648
+ "step": 20000
1649
+ },
1650
+ {
1651
+ "epoch": 5.74,
1652
+ "learning_rate": 0.00029949856069258176,
1653
+ "loss": 2.042,
1654
+ "step": 21000
1655
+ },
1656
+ {
1657
+ "epoch": 5.74,
1658
+ "eval_validation_chrf_beta": 2,
1659
+ "eval_validation_chrf_char_order": 6,
1660
+ "eval_validation_chrf_score": 41.49472756887711,
1661
+ "eval_validation_chrf_word_order": 2,
1662
+ "eval_validation_loss": 2.428771734237671,
1663
+ "eval_validation_meteor": 0.3799987462633123,
1664
+ "eval_validation_runtime": 19.422,
1665
+ "eval_validation_samples_per_second": 51.333,
1666
+ "eval_validation_scarebleu_bp": 1.0,
1667
+ "eval_validation_scarebleu_counts": [
1668
+ 8875,
1669
+ 3637,
1670
+ 1710,
1671
+ 824
1672
+ ],
1673
+ "eval_validation_scarebleu_precisions": [
1674
+ 45.40570960810396,
1675
+ 19.607526012183946,
1676
+ 9.742479489516864,
1677
+ 4.9773482331621866
1678
+ ],
1679
+ "eval_validation_scarebleu_ref_len": 19414,
1680
+ "eval_validation_scarebleu_score": 14.41452044509331,
1681
+ "eval_validation_scarebleu_sys_len": 19546,
1682
+ "eval_validation_scarebleu_totals": [
1683
+ 19546,
1684
+ 18549,
1685
+ 17552,
1686
+ 16555
1687
+ ],
1688
+ "eval_validation_steps_per_second": 0.824,
1689
+ "eval_validation_ter_num_edits": 12677,
1690
+ "eval_validation_ter_ref_length": 16729.0,
1691
+ "eval_validation_ter_score": 75.77858808057863,
1692
+ "step": 21000
1693
+ },
1694
+ {
1695
+ "epoch": 5.74,
1696
+ "eval_test_chrf_beta": 2,
1697
+ "eval_test_chrf_char_order": 6,
1698
+ "eval_test_chrf_score": 42.150596446775204,
1699
+ "eval_test_chrf_word_order": 2,
1700
+ "eval_test_loss": 2.435882091522217,
1701
+ "eval_test_meteor": 0.3867430131575975,
1702
+ "eval_test_runtime": 21.5564,
1703
+ "eval_test_samples_per_second": 46.947,
1704
+ "eval_test_scarebleu_bp": 1.0,
1705
+ "eval_test_scarebleu_counts": [
1706
+ 9407,
1707
+ 3916,
1708
+ 1868,
1709
+ 903
1710
+ ],
1711
+ "eval_test_scarebleu_precisions": [
1712
+ 45.89899975603806,
1713
+ 20.099573987578914,
1714
+ 10.113150343782145,
1715
+ 5.172117532504726
1716
+ ],
1717
+ "eval_test_scarebleu_ref_len": 20238,
1718
+ "eval_test_scarebleu_score": 14.821297704758008,
1719
+ "eval_test_scarebleu_sys_len": 20495,
1720
+ "eval_test_scarebleu_totals": [
1721
+ 20495,
1722
+ 19483,
1723
+ 18471,
1724
+ 17459
1725
+ ],
1726
+ "eval_test_steps_per_second": 0.742,
1727
+ "eval_test_ter_num_edits": 13178,
1728
+ "eval_test_ter_ref_length": 17367.0,
1729
+ "eval_test_ter_score": 75.87954165946911,
1730
+ "step": 21000
1731
+ }
1732
+ ],
1733
+ "max_steps": 731800,
1734
+ "num_train_epochs": 200,
1735
+ "total_flos": 1.4694529965249004e+18,
1736
+ "trial_name": null,
1737
+ "trial_params": null
1738
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d993f7ffbd6981369c4ac5f4a1b13c7a1af645c6c0726aa348bdbf89ddaaf4b3
3
+ size 3835