LRJ1981 commited on
Commit
94750da
1 Parent(s): 22030af

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ---
3
+ tags:
4
+ - autotrain
5
+ - text2text-generation
6
+ widget:
7
+ - text: "I love AutoTrain"
8
+ datasets:
9
+ - autotrain-ve993-lub6e/autotrain-data
10
+ ---
11
+
12
+ # Model Trained Using AutoTrain
13
+
14
+ - Problem type: Seq2Seq
15
+
16
+ ## Validation Metrics
17
+ loss: 1.5230909585952759
18
+
19
+ rouge1: 55.7716
20
+
21
+ rouge2: 33.0852
22
+
23
+ rougeL: 51.3404
24
+
25
+ rougeLsum: 51.4618
26
+
27
+ gen_len: 59.7293
28
+
29
+ runtime: 430.9744
30
+
31
+ samples_per_second: 3.875
32
+
33
+ steps_per_second: 0.195
34
+
35
+ : 9.0
checkpoint-4676/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-da-en",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "swish",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": false,
8
+ "architectures": [
9
+ "MarianMTModel"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bad_words_ids": [
13
+ [
14
+ 58929
15
+ ]
16
+ ],
17
+ "bos_token_id": 0,
18
+ "classif_dropout": 0.0,
19
+ "classifier_dropout": 0.0,
20
+ "d_model": 512,
21
+ "decoder_attention_heads": 8,
22
+ "decoder_ffn_dim": 2048,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 6,
25
+ "decoder_start_token_id": 58929,
26
+ "decoder_vocab_size": 58930,
27
+ "dropout": 0.1,
28
+ "encoder_attention_heads": 8,
29
+ "encoder_ffn_dim": 2048,
30
+ "encoder_layerdrop": 0.0,
31
+ "encoder_layers": 6,
32
+ "eos_token_id": 0,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 58929,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.40.1",
59
+ "use_cache": false,
60
+ "vocab_size": 58930
61
+ }
checkpoint-4676/generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 58929
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 58929,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 58929,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.40.1"
16
+ }
checkpoint-4676/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd59557e95ee48f4799a183b95004f37eeddb04d1f4125cf6d859fc60cedd5cd
3
+ size 297507400
checkpoint-4676/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69b583438c2a94795110470ffcdf0647bfc6d50f82c9d0d80f54ade77810a38c
3
+ size 594696826
checkpoint-4676/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d145668ab859cd1763e2dc7454c2ea3a12250310343ad9a384f87cbdbc54f09
3
+ size 14244
checkpoint-4676/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08dd588e72eb2629380818fe636ae342302694515239edc923e495c1e8ad34cd
3
+ size 1064
checkpoint-4676/source.spm ADDED
Binary file (820 kB). View file
 
checkpoint-4676/special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
checkpoint-4676/target.spm ADDED
Binary file (788 kB). View file
 
checkpoint-4676/tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "58929": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "separate_vocabs": false,
33
+ "source_lang": "da",
34
+ "sp_model_kwargs": {},
35
+ "target_lang": "en",
36
+ "tokenizer_class": "MarianTokenizer",
37
+ "unk_token": "<unk>"
38
+ }
checkpoint-4676/trainer_state.json ADDED
@@ -0,0 +1,1421 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.5230909585952759,
3
+ "best_model_checkpoint": "autotrain-ve993-lub6e/checkpoint-4676",
4
+ "epoch": 7.0,
5
+ "eval_steps": 500,
6
+ "global_step": 4676,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0374251497005988,
13
+ "grad_norm": 9.912540435791016,
14
+ "learning_rate": 1.7964071856287426e-06,
15
+ "loss": 5.9541,
16
+ "step": 25
17
+ },
18
+ {
19
+ "epoch": 0.0748502994011976,
20
+ "grad_norm": 8.57716178894043,
21
+ "learning_rate": 3.6676646706586825e-06,
22
+ "loss": 5.6517,
23
+ "step": 50
24
+ },
25
+ {
26
+ "epoch": 0.1122754491017964,
27
+ "grad_norm": 6.1179351806640625,
28
+ "learning_rate": 5.538922155688623e-06,
29
+ "loss": 5.3189,
30
+ "step": 75
31
+ },
32
+ {
33
+ "epoch": 0.1497005988023952,
34
+ "grad_norm": 6.469350337982178,
35
+ "learning_rate": 7.410179640718563e-06,
36
+ "loss": 4.9823,
37
+ "step": 100
38
+ },
39
+ {
40
+ "epoch": 0.18712574850299402,
41
+ "grad_norm": 6.627992153167725,
42
+ "learning_rate": 9.281437125748502e-06,
43
+ "loss": 4.704,
44
+ "step": 125
45
+ },
46
+ {
47
+ "epoch": 0.2245508982035928,
48
+ "grad_norm": 5.041338920593262,
49
+ "learning_rate": 1.1152694610778444e-05,
50
+ "loss": 4.4379,
51
+ "step": 150
52
+ },
53
+ {
54
+ "epoch": 0.2619760479041916,
55
+ "grad_norm": 6.7107977867126465,
56
+ "learning_rate": 1.3023952095808384e-05,
57
+ "loss": 4.2558,
58
+ "step": 175
59
+ },
60
+ {
61
+ "epoch": 0.2994011976047904,
62
+ "grad_norm": 6.578221797943115,
63
+ "learning_rate": 1.4895209580838324e-05,
64
+ "loss": 4.068,
65
+ "step": 200
66
+ },
67
+ {
68
+ "epoch": 0.33682634730538924,
69
+ "grad_norm": 6.045902252197266,
70
+ "learning_rate": 1.6766467065868263e-05,
71
+ "loss": 3.9557,
72
+ "step": 225
73
+ },
74
+ {
75
+ "epoch": 0.37425149700598803,
76
+ "grad_norm": 5.811893463134766,
77
+ "learning_rate": 1.8637724550898206e-05,
78
+ "loss": 3.8036,
79
+ "step": 250
80
+ },
81
+ {
82
+ "epoch": 0.4116766467065868,
83
+ "grad_norm": 5.545330047607422,
84
+ "learning_rate": 2.0508982035928146e-05,
85
+ "loss": 3.7448,
86
+ "step": 275
87
+ },
88
+ {
89
+ "epoch": 0.4491017964071856,
90
+ "grad_norm": 5.683607578277588,
91
+ "learning_rate": 2.2380239520958086e-05,
92
+ "loss": 3.587,
93
+ "step": 300
94
+ },
95
+ {
96
+ "epoch": 0.4865269461077844,
97
+ "grad_norm": 6.531998157501221,
98
+ "learning_rate": 2.4251497005988023e-05,
99
+ "loss": 3.5058,
100
+ "step": 325
101
+ },
102
+ {
103
+ "epoch": 0.5239520958083832,
104
+ "grad_norm": 5.926412105560303,
105
+ "learning_rate": 2.6122754491017963e-05,
106
+ "loss": 3.3958,
107
+ "step": 350
108
+ },
109
+ {
110
+ "epoch": 0.561377245508982,
111
+ "grad_norm": 5.820963382720947,
112
+ "learning_rate": 2.7994011976047907e-05,
113
+ "loss": 3.3647,
114
+ "step": 375
115
+ },
116
+ {
117
+ "epoch": 0.5988023952095808,
118
+ "grad_norm": 6.341830253601074,
119
+ "learning_rate": 2.9865269461077843e-05,
120
+ "loss": 3.2207,
121
+ "step": 400
122
+ },
123
+ {
124
+ "epoch": 0.6362275449101796,
125
+ "grad_norm": 5.764517784118652,
126
+ "learning_rate": 3.1736526946107784e-05,
127
+ "loss": 3.2476,
128
+ "step": 425
129
+ },
130
+ {
131
+ "epoch": 0.6736526946107785,
132
+ "grad_norm": 5.8953962326049805,
133
+ "learning_rate": 3.360778443113773e-05,
134
+ "loss": 3.1193,
135
+ "step": 450
136
+ },
137
+ {
138
+ "epoch": 0.7110778443113772,
139
+ "grad_norm": 5.671535968780518,
140
+ "learning_rate": 3.5479041916167664e-05,
141
+ "loss": 3.0553,
142
+ "step": 475
143
+ },
144
+ {
145
+ "epoch": 0.7485029940119761,
146
+ "grad_norm": 5.246084690093994,
147
+ "learning_rate": 3.735029940119761e-05,
148
+ "loss": 2.9964,
149
+ "step": 500
150
+ },
151
+ {
152
+ "epoch": 0.7859281437125748,
153
+ "grad_norm": 6.070059299468994,
154
+ "learning_rate": 3.9221556886227544e-05,
155
+ "loss": 2.9678,
156
+ "step": 525
157
+ },
158
+ {
159
+ "epoch": 0.8233532934131736,
160
+ "grad_norm": 5.288054943084717,
161
+ "learning_rate": 4.109281437125749e-05,
162
+ "loss": 2.9111,
163
+ "step": 550
164
+ },
165
+ {
166
+ "epoch": 0.8607784431137725,
167
+ "grad_norm": 6.651124000549316,
168
+ "learning_rate": 4.2964071856287424e-05,
169
+ "loss": 2.878,
170
+ "step": 575
171
+ },
172
+ {
173
+ "epoch": 0.8982035928143712,
174
+ "grad_norm": 5.776132106781006,
175
+ "learning_rate": 4.483532934131737e-05,
176
+ "loss": 2.8239,
177
+ "step": 600
178
+ },
179
+ {
180
+ "epoch": 0.9356287425149701,
181
+ "grad_norm": 5.356322288513184,
182
+ "learning_rate": 4.670658682634731e-05,
183
+ "loss": 2.7097,
184
+ "step": 625
185
+ },
186
+ {
187
+ "epoch": 0.9730538922155688,
188
+ "grad_norm": 5.361959457397461,
189
+ "learning_rate": 4.857784431137725e-05,
190
+ "loss": 2.7526,
191
+ "step": 650
192
+ },
193
+ {
194
+ "epoch": 1.0,
195
+ "eval_gen_len": 60.8838,
196
+ "eval_loss": 2.430297613143921,
197
+ "eval_rouge1": 43.4684,
198
+ "eval_rouge2": 19.1354,
199
+ "eval_rougeL": 38.9925,
200
+ "eval_rougeLsum": 39.1478,
201
+ "eval_runtime": 483.1059,
202
+ "eval_samples_per_second": 3.457,
203
+ "eval_steps_per_second": 0.174,
204
+ "step": 668
205
+ },
206
+ {
207
+ "epoch": 1.0104790419161676,
208
+ "grad_norm": 6.461863040924072,
209
+ "learning_rate": 4.99500998003992e-05,
210
+ "loss": 2.6027,
211
+ "step": 675
212
+ },
213
+ {
214
+ "epoch": 1.0479041916167664,
215
+ "grad_norm": 4.890101432800293,
216
+ "learning_rate": 4.9742182302062544e-05,
217
+ "loss": 2.489,
218
+ "step": 700
219
+ },
220
+ {
221
+ "epoch": 1.0853293413173652,
222
+ "grad_norm": 5.590378761291504,
223
+ "learning_rate": 4.953426480372588e-05,
224
+ "loss": 2.4426,
225
+ "step": 725
226
+ },
227
+ {
228
+ "epoch": 1.122754491017964,
229
+ "grad_norm": 5.469313144683838,
230
+ "learning_rate": 4.932634730538922e-05,
231
+ "loss": 2.393,
232
+ "step": 750
233
+ },
234
+ {
235
+ "epoch": 1.160179640718563,
236
+ "grad_norm": 5.43513822555542,
237
+ "learning_rate": 4.9118429807052565e-05,
238
+ "loss": 2.4025,
239
+ "step": 775
240
+ },
241
+ {
242
+ "epoch": 1.1976047904191618,
243
+ "grad_norm": 5.54775333404541,
244
+ "learning_rate": 4.891051230871591e-05,
245
+ "loss": 2.3976,
246
+ "step": 800
247
+ },
248
+ {
249
+ "epoch": 1.2350299401197604,
250
+ "grad_norm": 4.897847652435303,
251
+ "learning_rate": 4.8702594810379244e-05,
252
+ "loss": 2.3977,
253
+ "step": 825
254
+ },
255
+ {
256
+ "epoch": 1.2724550898203593,
257
+ "grad_norm": 5.192740440368652,
258
+ "learning_rate": 4.8494677312042586e-05,
259
+ "loss": 2.2916,
260
+ "step": 850
261
+ },
262
+ {
263
+ "epoch": 1.3098802395209581,
264
+ "grad_norm": 5.499965190887451,
265
+ "learning_rate": 4.828675981370592e-05,
266
+ "loss": 2.3031,
267
+ "step": 875
268
+ },
269
+ {
270
+ "epoch": 1.347305389221557,
271
+ "grad_norm": 4.9002580642700195,
272
+ "learning_rate": 4.8078842315369265e-05,
273
+ "loss": 2.2754,
274
+ "step": 900
275
+ },
276
+ {
277
+ "epoch": 1.3847305389221556,
278
+ "grad_norm": 4.921010494232178,
279
+ "learning_rate": 4.78709248170326e-05,
280
+ "loss": 2.2357,
281
+ "step": 925
282
+ },
283
+ {
284
+ "epoch": 1.4221556886227544,
285
+ "grad_norm": 5.515709400177002,
286
+ "learning_rate": 4.766300731869594e-05,
287
+ "loss": 2.3363,
288
+ "step": 950
289
+ },
290
+ {
291
+ "epoch": 1.4595808383233533,
292
+ "grad_norm": 5.200982093811035,
293
+ "learning_rate": 4.7455089820359286e-05,
294
+ "loss": 2.2036,
295
+ "step": 975
296
+ },
297
+ {
298
+ "epoch": 1.4970059880239521,
299
+ "grad_norm": 5.73722505569458,
300
+ "learning_rate": 4.724717232202263e-05,
301
+ "loss": 2.2285,
302
+ "step": 1000
303
+ },
304
+ {
305
+ "epoch": 1.534431137724551,
306
+ "grad_norm": 5.761080265045166,
307
+ "learning_rate": 4.7039254823685964e-05,
308
+ "loss": 2.1971,
309
+ "step": 1025
310
+ },
311
+ {
312
+ "epoch": 1.5718562874251498,
313
+ "grad_norm": 4.7960524559021,
314
+ "learning_rate": 4.683133732534931e-05,
315
+ "loss": 2.1514,
316
+ "step": 1050
317
+ },
318
+ {
319
+ "epoch": 1.6092814371257484,
320
+ "grad_norm": 5.1422810554504395,
321
+ "learning_rate": 4.662341982701264e-05,
322
+ "loss": 2.1301,
323
+ "step": 1075
324
+ },
325
+ {
326
+ "epoch": 1.6467065868263473,
327
+ "grad_norm": 5.624262809753418,
328
+ "learning_rate": 4.6415502328675985e-05,
329
+ "loss": 2.0802,
330
+ "step": 1100
331
+ },
332
+ {
333
+ "epoch": 1.6841317365269461,
334
+ "grad_norm": 4.837282657623291,
335
+ "learning_rate": 4.620758483033932e-05,
336
+ "loss": 2.1048,
337
+ "step": 1125
338
+ },
339
+ {
340
+ "epoch": 1.7215568862275448,
341
+ "grad_norm": 5.103292465209961,
342
+ "learning_rate": 4.5999667332002664e-05,
343
+ "loss": 1.9987,
344
+ "step": 1150
345
+ },
346
+ {
347
+ "epoch": 1.7589820359281436,
348
+ "grad_norm": 5.245999336242676,
349
+ "learning_rate": 4.5791749833666006e-05,
350
+ "loss": 2.01,
351
+ "step": 1175
352
+ },
353
+ {
354
+ "epoch": 1.7964071856287425,
355
+ "grad_norm": 4.83555269241333,
356
+ "learning_rate": 4.558383233532935e-05,
357
+ "loss": 2.0216,
358
+ "step": 1200
359
+ },
360
+ {
361
+ "epoch": 1.8338323353293413,
362
+ "grad_norm": 4.849733352661133,
363
+ "learning_rate": 4.5375914836992685e-05,
364
+ "loss": 2.0782,
365
+ "step": 1225
366
+ },
367
+ {
368
+ "epoch": 1.8712574850299402,
369
+ "grad_norm": 5.461581707000732,
370
+ "learning_rate": 4.516799733865603e-05,
371
+ "loss": 2.0555,
372
+ "step": 1250
373
+ },
374
+ {
375
+ "epoch": 1.908682634730539,
376
+ "grad_norm": 5.006551742553711,
377
+ "learning_rate": 4.496007984031936e-05,
378
+ "loss": 2.0485,
379
+ "step": 1275
380
+ },
381
+ {
382
+ "epoch": 1.9461077844311379,
383
+ "grad_norm": 4.840539455413818,
384
+ "learning_rate": 4.47521623419827e-05,
385
+ "loss": 1.9808,
386
+ "step": 1300
387
+ },
388
+ {
389
+ "epoch": 1.9835329341317365,
390
+ "grad_norm": 4.997801303863525,
391
+ "learning_rate": 4.454424484364604e-05,
392
+ "loss": 1.9118,
393
+ "step": 1325
394
+ },
395
+ {
396
+ "epoch": 2.0,
397
+ "eval_gen_len": 59.1407,
398
+ "eval_loss": 1.8773729801177979,
399
+ "eval_rouge1": 50.6374,
400
+ "eval_rouge2": 26.6607,
401
+ "eval_rougeL": 46.1237,
402
+ "eval_rougeLsum": 46.2222,
403
+ "eval_runtime": 439.7943,
404
+ "eval_samples_per_second": 3.797,
405
+ "eval_steps_per_second": 0.191,
406
+ "step": 1336
407
+ },
408
+ {
409
+ "epoch": 2.020958083832335,
410
+ "grad_norm": 4.751077651977539,
411
+ "learning_rate": 4.433632734530938e-05,
412
+ "loss": 1.8838,
413
+ "step": 1350
414
+ },
415
+ {
416
+ "epoch": 2.058383233532934,
417
+ "grad_norm": 5.055469036102295,
418
+ "learning_rate": 4.412840984697272e-05,
419
+ "loss": 1.7158,
420
+ "step": 1375
421
+ },
422
+ {
423
+ "epoch": 2.095808383233533,
424
+ "grad_norm": 4.67478609085083,
425
+ "learning_rate": 4.392049234863606e-05,
426
+ "loss": 1.7627,
427
+ "step": 1400
428
+ },
429
+ {
430
+ "epoch": 2.1332335329341316,
431
+ "grad_norm": 5.1834306716918945,
432
+ "learning_rate": 4.3712574850299406e-05,
433
+ "loss": 1.8174,
434
+ "step": 1425
435
+ },
436
+ {
437
+ "epoch": 2.1706586826347305,
438
+ "grad_norm": 4.199576377868652,
439
+ "learning_rate": 4.350465735196274e-05,
440
+ "loss": 1.7619,
441
+ "step": 1450
442
+ },
443
+ {
444
+ "epoch": 2.2080838323353293,
445
+ "grad_norm": 4.928585529327393,
446
+ "learning_rate": 4.3296739853626084e-05,
447
+ "loss": 1.7676,
448
+ "step": 1475
449
+ },
450
+ {
451
+ "epoch": 2.245508982035928,
452
+ "grad_norm": 4.089141368865967,
453
+ "learning_rate": 4.308882235528942e-05,
454
+ "loss": 1.7588,
455
+ "step": 1500
456
+ },
457
+ {
458
+ "epoch": 2.282934131736527,
459
+ "grad_norm": 5.2093706130981445,
460
+ "learning_rate": 4.288090485695276e-05,
461
+ "loss": 1.7744,
462
+ "step": 1525
463
+ },
464
+ {
465
+ "epoch": 2.320359281437126,
466
+ "grad_norm": 4.752145767211914,
467
+ "learning_rate": 4.26729873586161e-05,
468
+ "loss": 1.7208,
469
+ "step": 1550
470
+ },
471
+ {
472
+ "epoch": 2.3577844311377247,
473
+ "grad_norm": 4.885648727416992,
474
+ "learning_rate": 4.246506986027944e-05,
475
+ "loss": 1.7038,
476
+ "step": 1575
477
+ },
478
+ {
479
+ "epoch": 2.3952095808383236,
480
+ "grad_norm": 5.004818439483643,
481
+ "learning_rate": 4.2257152361942784e-05,
482
+ "loss": 1.7129,
483
+ "step": 1600
484
+ },
485
+ {
486
+ "epoch": 2.432634730538922,
487
+ "grad_norm": 4.234861373901367,
488
+ "learning_rate": 4.2049234863606126e-05,
489
+ "loss": 1.6684,
490
+ "step": 1625
491
+ },
492
+ {
493
+ "epoch": 2.470059880239521,
494
+ "grad_norm": 4.313364028930664,
495
+ "learning_rate": 4.184131736526946e-05,
496
+ "loss": 1.6709,
497
+ "step": 1650
498
+ },
499
+ {
500
+ "epoch": 2.5074850299401197,
501
+ "grad_norm": 5.006948471069336,
502
+ "learning_rate": 4.1633399866932805e-05,
503
+ "loss": 1.7118,
504
+ "step": 1675
505
+ },
506
+ {
507
+ "epoch": 2.5449101796407185,
508
+ "grad_norm": 5.49020528793335,
509
+ "learning_rate": 4.142548236859614e-05,
510
+ "loss": 1.7034,
511
+ "step": 1700
512
+ },
513
+ {
514
+ "epoch": 2.5823353293413174,
515
+ "grad_norm": 4.611372470855713,
516
+ "learning_rate": 4.121756487025948e-05,
517
+ "loss": 1.6309,
518
+ "step": 1725
519
+ },
520
+ {
521
+ "epoch": 2.6197604790419162,
522
+ "grad_norm": 4.116524696350098,
523
+ "learning_rate": 4.100964737192282e-05,
524
+ "loss": 1.654,
525
+ "step": 1750
526
+ },
527
+ {
528
+ "epoch": 2.657185628742515,
529
+ "grad_norm": 4.763690948486328,
530
+ "learning_rate": 4.080172987358616e-05,
531
+ "loss": 1.6983,
532
+ "step": 1775
533
+ },
534
+ {
535
+ "epoch": 2.694610778443114,
536
+ "grad_norm": 3.969961404800415,
537
+ "learning_rate": 4.0593812375249504e-05,
538
+ "loss": 1.666,
539
+ "step": 1800
540
+ },
541
+ {
542
+ "epoch": 2.7320359281437128,
543
+ "grad_norm": 4.265604496002197,
544
+ "learning_rate": 4.038589487691285e-05,
545
+ "loss": 1.7378,
546
+ "step": 1825
547
+ },
548
+ {
549
+ "epoch": 2.769461077844311,
550
+ "grad_norm": 4.415422439575195,
551
+ "learning_rate": 4.017797737857618e-05,
552
+ "loss": 1.6482,
553
+ "step": 1850
554
+ },
555
+ {
556
+ "epoch": 2.80688622754491,
557
+ "grad_norm": 4.685695648193359,
558
+ "learning_rate": 3.9970059880239525e-05,
559
+ "loss": 1.6576,
560
+ "step": 1875
561
+ },
562
+ {
563
+ "epoch": 2.844311377245509,
564
+ "grad_norm": 4.4989399909973145,
565
+ "learning_rate": 3.976214238190286e-05,
566
+ "loss": 1.6885,
567
+ "step": 1900
568
+ },
569
+ {
570
+ "epoch": 2.8817365269461077,
571
+ "grad_norm": 4.608761310577393,
572
+ "learning_rate": 3.9554224883566204e-05,
573
+ "loss": 1.6823,
574
+ "step": 1925
575
+ },
576
+ {
577
+ "epoch": 2.9191616766467066,
578
+ "grad_norm": 4.4332475662231445,
579
+ "learning_rate": 3.934630738522954e-05,
580
+ "loss": 1.6424,
581
+ "step": 1950
582
+ },
583
+ {
584
+ "epoch": 2.9565868263473054,
585
+ "grad_norm": 4.602639198303223,
586
+ "learning_rate": 3.913838988689288e-05,
587
+ "loss": 1.6947,
588
+ "step": 1975
589
+ },
590
+ {
591
+ "epoch": 2.9940119760479043,
592
+ "grad_norm": 4.250889301300049,
593
+ "learning_rate": 3.8930472388556225e-05,
594
+ "loss": 1.6076,
595
+ "step": 2000
596
+ },
597
+ {
598
+ "epoch": 3.0,
599
+ "eval_gen_len": 59.3683,
600
+ "eval_loss": 1.6866850852966309,
601
+ "eval_rouge1": 53.1998,
602
+ "eval_rouge2": 29.7919,
603
+ "eval_rougeL": 48.8395,
604
+ "eval_rougeLsum": 48.9549,
605
+ "eval_runtime": 382.1142,
606
+ "eval_samples_per_second": 4.37,
607
+ "eval_steps_per_second": 0.22,
608
+ "step": 2004
609
+ },
610
+ {
611
+ "epoch": 3.031437125748503,
612
+ "grad_norm": 5.463351249694824,
613
+ "learning_rate": 3.872255489021957e-05,
614
+ "loss": 1.4482,
615
+ "step": 2025
616
+ },
617
+ {
618
+ "epoch": 3.068862275449102,
619
+ "grad_norm": 4.865541458129883,
620
+ "learning_rate": 3.8514637391882903e-05,
621
+ "loss": 1.4829,
622
+ "step": 2050
623
+ },
624
+ {
625
+ "epoch": 3.106287425149701,
626
+ "grad_norm": 4.7900190353393555,
627
+ "learning_rate": 3.8306719893546246e-05,
628
+ "loss": 1.3993,
629
+ "step": 2075
630
+ },
631
+ {
632
+ "epoch": 3.143712574850299,
633
+ "grad_norm": 4.584388732910156,
634
+ "learning_rate": 3.809880239520958e-05,
635
+ "loss": 1.4461,
636
+ "step": 2100
637
+ },
638
+ {
639
+ "epoch": 3.181137724550898,
640
+ "grad_norm": 4.259522438049316,
641
+ "learning_rate": 3.7890884896872925e-05,
642
+ "loss": 1.4739,
643
+ "step": 2125
644
+ },
645
+ {
646
+ "epoch": 3.218562874251497,
647
+ "grad_norm": 4.623165130615234,
648
+ "learning_rate": 3.768296739853626e-05,
649
+ "loss": 1.4349,
650
+ "step": 2150
651
+ },
652
+ {
653
+ "epoch": 3.2559880239520957,
654
+ "grad_norm": 4.376612186431885,
655
+ "learning_rate": 3.74750499001996e-05,
656
+ "loss": 1.3954,
657
+ "step": 2175
658
+ },
659
+ {
660
+ "epoch": 3.2934131736526946,
661
+ "grad_norm": 5.000776290893555,
662
+ "learning_rate": 3.726713240186294e-05,
663
+ "loss": 1.4457,
664
+ "step": 2200
665
+ },
666
+ {
667
+ "epoch": 3.3308383233532934,
668
+ "grad_norm": 4.057362079620361,
669
+ "learning_rate": 3.705921490352628e-05,
670
+ "loss": 1.4247,
671
+ "step": 2225
672
+ },
673
+ {
674
+ "epoch": 3.3682634730538923,
675
+ "grad_norm": 5.192569732666016,
676
+ "learning_rate": 3.6851297405189624e-05,
677
+ "loss": 1.4742,
678
+ "step": 2250
679
+ },
680
+ {
681
+ "epoch": 3.405688622754491,
682
+ "grad_norm": 4.622374534606934,
683
+ "learning_rate": 3.664337990685297e-05,
684
+ "loss": 1.4153,
685
+ "step": 2275
686
+ },
687
+ {
688
+ "epoch": 3.44311377245509,
689
+ "grad_norm": 4.387070655822754,
690
+ "learning_rate": 3.64354624085163e-05,
691
+ "loss": 1.3851,
692
+ "step": 2300
693
+ },
694
+ {
695
+ "epoch": 3.480538922155689,
696
+ "grad_norm": 4.561577796936035,
697
+ "learning_rate": 3.6227544910179645e-05,
698
+ "loss": 1.4805,
699
+ "step": 2325
700
+ },
701
+ {
702
+ "epoch": 3.5179640718562872,
703
+ "grad_norm": 4.737853527069092,
704
+ "learning_rate": 3.601962741184298e-05,
705
+ "loss": 1.4177,
706
+ "step": 2350
707
+ },
708
+ {
709
+ "epoch": 3.555389221556886,
710
+ "grad_norm": 3.9790077209472656,
711
+ "learning_rate": 3.5811709913506324e-05,
712
+ "loss": 1.436,
713
+ "step": 2375
714
+ },
715
+ {
716
+ "epoch": 3.592814371257485,
717
+ "grad_norm": 4.334903717041016,
718
+ "learning_rate": 3.560379241516966e-05,
719
+ "loss": 1.4621,
720
+ "step": 2400
721
+ },
722
+ {
723
+ "epoch": 3.6302395209580838,
724
+ "grad_norm": 5.124073505401611,
725
+ "learning_rate": 3.5395874916833e-05,
726
+ "loss": 1.446,
727
+ "step": 2425
728
+ },
729
+ {
730
+ "epoch": 3.6676646706586826,
731
+ "grad_norm": 4.298630237579346,
732
+ "learning_rate": 3.5187957418496345e-05,
733
+ "loss": 1.4052,
734
+ "step": 2450
735
+ },
736
+ {
737
+ "epoch": 3.7050898203592815,
738
+ "grad_norm": 4.598848819732666,
739
+ "learning_rate": 3.498003992015968e-05,
740
+ "loss": 1.4273,
741
+ "step": 2475
742
+ },
743
+ {
744
+ "epoch": 3.7425149700598803,
745
+ "grad_norm": 4.356764793395996,
746
+ "learning_rate": 3.477212242182302e-05,
747
+ "loss": 1.4811,
748
+ "step": 2500
749
+ },
750
+ {
751
+ "epoch": 3.779940119760479,
752
+ "grad_norm": 5.15587854385376,
753
+ "learning_rate": 3.456420492348636e-05,
754
+ "loss": 1.4153,
755
+ "step": 2525
756
+ },
757
+ {
758
+ "epoch": 3.817365269461078,
759
+ "grad_norm": 5.045792579650879,
760
+ "learning_rate": 3.43562874251497e-05,
761
+ "loss": 1.4457,
762
+ "step": 2550
763
+ },
764
+ {
765
+ "epoch": 3.8547904191616764,
766
+ "grad_norm": 4.461826801300049,
767
+ "learning_rate": 3.414836992681304e-05,
768
+ "loss": 1.4368,
769
+ "step": 2575
770
+ },
771
+ {
772
+ "epoch": 3.8922155688622757,
773
+ "grad_norm": 5.3425493240356445,
774
+ "learning_rate": 3.394045242847638e-05,
775
+ "loss": 1.4049,
776
+ "step": 2600
777
+ },
778
+ {
779
+ "epoch": 3.929640718562874,
780
+ "grad_norm": 3.766526699066162,
781
+ "learning_rate": 3.373253493013972e-05,
782
+ "loss": 1.4038,
783
+ "step": 2625
784
+ },
785
+ {
786
+ "epoch": 3.967065868263473,
787
+ "grad_norm": 4.911787033081055,
788
+ "learning_rate": 3.3524617431803065e-05,
789
+ "loss": 1.4198,
790
+ "step": 2650
791
+ },
792
+ {
793
+ "epoch": 4.0,
794
+ "eval_gen_len": 59.4353,
795
+ "eval_loss": 1.6032490730285645,
796
+ "eval_rouge1": 54.5779,
797
+ "eval_rouge2": 31.4538,
798
+ "eval_rougeL": 50.0841,
799
+ "eval_rougeLsum": 50.2011,
800
+ "eval_runtime": 399.7762,
801
+ "eval_samples_per_second": 4.177,
802
+ "eval_steps_per_second": 0.21,
803
+ "step": 2672
804
+ },
805
+ {
806
+ "epoch": 4.004491017964072,
807
+ "grad_norm": 3.8055388927459717,
808
+ "learning_rate": 3.33166999334664e-05,
809
+ "loss": 1.4083,
810
+ "step": 2675
811
+ },
812
+ {
813
+ "epoch": 4.04191616766467,
814
+ "grad_norm": 4.550738334655762,
815
+ "learning_rate": 3.3108782435129744e-05,
816
+ "loss": 1.2661,
817
+ "step": 2700
818
+ },
819
+ {
820
+ "epoch": 4.0793413173652695,
821
+ "grad_norm": 4.272867202758789,
822
+ "learning_rate": 3.290086493679308e-05,
823
+ "loss": 1.2666,
824
+ "step": 2725
825
+ },
826
+ {
827
+ "epoch": 4.116766467065868,
828
+ "grad_norm": 4.6178975105285645,
829
+ "learning_rate": 3.269294743845642e-05,
830
+ "loss": 1.2705,
831
+ "step": 2750
832
+ },
833
+ {
834
+ "epoch": 4.154191616766467,
835
+ "grad_norm": 4.523929595947266,
836
+ "learning_rate": 3.248502994011976e-05,
837
+ "loss": 1.2674,
838
+ "step": 2775
839
+ },
840
+ {
841
+ "epoch": 4.191616766467066,
842
+ "grad_norm": 4.166606426239014,
843
+ "learning_rate": 3.22771124417831e-05,
844
+ "loss": 1.2285,
845
+ "step": 2800
846
+ },
847
+ {
848
+ "epoch": 4.229041916167665,
849
+ "grad_norm": 4.123347759246826,
850
+ "learning_rate": 3.206919494344644e-05,
851
+ "loss": 1.2656,
852
+ "step": 2825
853
+ },
854
+ {
855
+ "epoch": 4.266467065868263,
856
+ "grad_norm": 4.342975616455078,
857
+ "learning_rate": 3.1861277445109786e-05,
858
+ "loss": 1.2482,
859
+ "step": 2850
860
+ },
861
+ {
862
+ "epoch": 4.303892215568863,
863
+ "grad_norm": 5.345706462860107,
864
+ "learning_rate": 3.165335994677312e-05,
865
+ "loss": 1.253,
866
+ "step": 2875
867
+ },
868
+ {
869
+ "epoch": 4.341317365269461,
870
+ "grad_norm": 4.096311092376709,
871
+ "learning_rate": 3.1445442448436465e-05,
872
+ "loss": 1.2157,
873
+ "step": 2900
874
+ },
875
+ {
876
+ "epoch": 4.37874251497006,
877
+ "grad_norm": 4.385800361633301,
878
+ "learning_rate": 3.12375249500998e-05,
879
+ "loss": 1.2742,
880
+ "step": 2925
881
+ },
882
+ {
883
+ "epoch": 4.416167664670659,
884
+ "grad_norm": 4.278284549713135,
885
+ "learning_rate": 3.102960745176314e-05,
886
+ "loss": 1.2775,
887
+ "step": 2950
888
+ },
889
+ {
890
+ "epoch": 4.453592814371257,
891
+ "grad_norm": 4.5966057777404785,
892
+ "learning_rate": 3.082168995342648e-05,
893
+ "loss": 1.2118,
894
+ "step": 2975
895
+ },
896
+ {
897
+ "epoch": 4.491017964071856,
898
+ "grad_norm": 5.148831367492676,
899
+ "learning_rate": 3.061377245508982e-05,
900
+ "loss": 1.2548,
901
+ "step": 3000
902
+ },
903
+ {
904
+ "epoch": 4.528443113772455,
905
+ "grad_norm": 4.798081398010254,
906
+ "learning_rate": 3.040585495675316e-05,
907
+ "loss": 1.241,
908
+ "step": 3025
909
+ },
910
+ {
911
+ "epoch": 4.565868263473054,
912
+ "grad_norm": 4.575997829437256,
913
+ "learning_rate": 3.0197937458416503e-05,
914
+ "loss": 1.2761,
915
+ "step": 3050
916
+ },
917
+ {
918
+ "epoch": 4.6032934131736525,
919
+ "grad_norm": 4.259399890899658,
920
+ "learning_rate": 2.999001996007984e-05,
921
+ "loss": 1.2687,
922
+ "step": 3075
923
+ },
924
+ {
925
+ "epoch": 4.640718562874252,
926
+ "grad_norm": 4.092325210571289,
927
+ "learning_rate": 2.9782102461743182e-05,
928
+ "loss": 1.2834,
929
+ "step": 3100
930
+ },
931
+ {
932
+ "epoch": 4.67814371257485,
933
+ "grad_norm": 4.5153021812438965,
934
+ "learning_rate": 2.957418496340652e-05,
935
+ "loss": 1.2335,
936
+ "step": 3125
937
+ },
938
+ {
939
+ "epoch": 4.7155688622754495,
940
+ "grad_norm": 4.831052780151367,
941
+ "learning_rate": 2.9366267465069864e-05,
942
+ "loss": 1.2693,
943
+ "step": 3150
944
+ },
945
+ {
946
+ "epoch": 4.752994011976048,
947
+ "grad_norm": 4.156602382659912,
948
+ "learning_rate": 2.91583499667332e-05,
949
+ "loss": 1.2492,
950
+ "step": 3175
951
+ },
952
+ {
953
+ "epoch": 4.790419161676647,
954
+ "grad_norm": 4.299253940582275,
955
+ "learning_rate": 2.8950432468396542e-05,
956
+ "loss": 1.2583,
957
+ "step": 3200
958
+ },
959
+ {
960
+ "epoch": 4.827844311377246,
961
+ "grad_norm": 4.564883232116699,
962
+ "learning_rate": 2.874251497005988e-05,
963
+ "loss": 1.2478,
964
+ "step": 3225
965
+ },
966
+ {
967
+ "epoch": 4.865269461077844,
968
+ "grad_norm": 4.6624250411987305,
969
+ "learning_rate": 2.8534597471723224e-05,
970
+ "loss": 1.2777,
971
+ "step": 3250
972
+ },
973
+ {
974
+ "epoch": 4.902694610778443,
975
+ "grad_norm": 4.570215702056885,
976
+ "learning_rate": 2.832667997338656e-05,
977
+ "loss": 1.2541,
978
+ "step": 3275
979
+ },
980
+ {
981
+ "epoch": 4.940119760479042,
982
+ "grad_norm": 4.049204349517822,
983
+ "learning_rate": 2.8118762475049902e-05,
984
+ "loss": 1.2474,
985
+ "step": 3300
986
+ },
987
+ {
988
+ "epoch": 4.977544910179641,
989
+ "grad_norm": 3.723095178604126,
990
+ "learning_rate": 2.7910844976713242e-05,
991
+ "loss": 1.1911,
992
+ "step": 3325
993
+ },
994
+ {
995
+ "epoch": 5.0,
996
+ "eval_gen_len": 58.9928,
997
+ "eval_loss": 1.555981993675232,
998
+ "eval_rouge1": 55.2733,
999
+ "eval_rouge2": 32.3347,
1000
+ "eval_rougeL": 51.0279,
1001
+ "eval_rougeLsum": 51.1368,
1002
+ "eval_runtime": 420.4345,
1003
+ "eval_samples_per_second": 3.972,
1004
+ "eval_steps_per_second": 0.2,
1005
+ "step": 3340
1006
+ },
1007
+ {
1008
+ "epoch": 5.014970059880239,
1009
+ "grad_norm": 4.139627933502197,
1010
+ "learning_rate": 2.7702927478376584e-05,
1011
+ "loss": 1.1937,
1012
+ "step": 3350
1013
+ },
1014
+ {
1015
+ "epoch": 5.052395209580839,
1016
+ "grad_norm": 4.160578727722168,
1017
+ "learning_rate": 2.749500998003992e-05,
1018
+ "loss": 1.0797,
1019
+ "step": 3375
1020
+ },
1021
+ {
1022
+ "epoch": 5.089820359281437,
1023
+ "grad_norm": 4.00601863861084,
1024
+ "learning_rate": 2.7287092481703263e-05,
1025
+ "loss": 1.1202,
1026
+ "step": 3400
1027
+ },
1028
+ {
1029
+ "epoch": 5.127245508982036,
1030
+ "grad_norm": 4.496889114379883,
1031
+ "learning_rate": 2.7079174983366602e-05,
1032
+ "loss": 1.1162,
1033
+ "step": 3425
1034
+ },
1035
+ {
1036
+ "epoch": 5.164670658682635,
1037
+ "grad_norm": 5.572657585144043,
1038
+ "learning_rate": 2.6871257485029945e-05,
1039
+ "loss": 1.0752,
1040
+ "step": 3450
1041
+ },
1042
+ {
1043
+ "epoch": 5.202095808383233,
1044
+ "grad_norm": 4.205285549163818,
1045
+ "learning_rate": 2.666333998669328e-05,
1046
+ "loss": 1.1619,
1047
+ "step": 3475
1048
+ },
1049
+ {
1050
+ "epoch": 5.2395209580838324,
1051
+ "grad_norm": 4.050065517425537,
1052
+ "learning_rate": 2.6455422488356623e-05,
1053
+ "loss": 1.1142,
1054
+ "step": 3500
1055
+ },
1056
+ {
1057
+ "epoch": 5.276946107784431,
1058
+ "grad_norm": 4.341710567474365,
1059
+ "learning_rate": 2.6247504990019962e-05,
1060
+ "loss": 1.1131,
1061
+ "step": 3525
1062
+ },
1063
+ {
1064
+ "epoch": 5.31437125748503,
1065
+ "grad_norm": 4.592411994934082,
1066
+ "learning_rate": 2.6039587491683305e-05,
1067
+ "loss": 1.1312,
1068
+ "step": 3550
1069
+ },
1070
+ {
1071
+ "epoch": 5.3517964071856285,
1072
+ "grad_norm": 4.056641101837158,
1073
+ "learning_rate": 2.583166999334664e-05,
1074
+ "loss": 1.1562,
1075
+ "step": 3575
1076
+ },
1077
+ {
1078
+ "epoch": 5.389221556886228,
1079
+ "grad_norm": 4.492640018463135,
1080
+ "learning_rate": 2.5623752495009983e-05,
1081
+ "loss": 1.1004,
1082
+ "step": 3600
1083
+ },
1084
+ {
1085
+ "epoch": 5.426646706586826,
1086
+ "grad_norm": 4.112987995147705,
1087
+ "learning_rate": 2.5415834996673323e-05,
1088
+ "loss": 1.1045,
1089
+ "step": 3625
1090
+ },
1091
+ {
1092
+ "epoch": 5.4640718562874255,
1093
+ "grad_norm": 4.25972843170166,
1094
+ "learning_rate": 2.5207917498336665e-05,
1095
+ "loss": 1.1705,
1096
+ "step": 3650
1097
+ },
1098
+ {
1099
+ "epoch": 5.501497005988024,
1100
+ "grad_norm": 4.702874660491943,
1101
+ "learning_rate": 2.5e-05,
1102
+ "loss": 1.1236,
1103
+ "step": 3675
1104
+ },
1105
+ {
1106
+ "epoch": 5.538922155688622,
1107
+ "grad_norm": 4.413760662078857,
1108
+ "learning_rate": 2.479208250166334e-05,
1109
+ "loss": 1.0773,
1110
+ "step": 3700
1111
+ },
1112
+ {
1113
+ "epoch": 5.576347305389222,
1114
+ "grad_norm": 4.19527530670166,
1115
+ "learning_rate": 2.458416500332668e-05,
1116
+ "loss": 1.1463,
1117
+ "step": 3725
1118
+ },
1119
+ {
1120
+ "epoch": 5.61377245508982,
1121
+ "grad_norm": 4.174712657928467,
1122
+ "learning_rate": 2.4376247504990022e-05,
1123
+ "loss": 1.1354,
1124
+ "step": 3750
1125
+ },
1126
+ {
1127
+ "epoch": 5.651197604790419,
1128
+ "grad_norm": 4.140392780303955,
1129
+ "learning_rate": 2.416833000665336e-05,
1130
+ "loss": 1.1154,
1131
+ "step": 3775
1132
+ },
1133
+ {
1134
+ "epoch": 5.688622754491018,
1135
+ "grad_norm": 6.118780612945557,
1136
+ "learning_rate": 2.39604125083167e-05,
1137
+ "loss": 1.1698,
1138
+ "step": 3800
1139
+ },
1140
+ {
1141
+ "epoch": 5.726047904191617,
1142
+ "grad_norm": 4.042623043060303,
1143
+ "learning_rate": 2.375249500998004e-05,
1144
+ "loss": 1.0963,
1145
+ "step": 3825
1146
+ },
1147
+ {
1148
+ "epoch": 5.763473053892215,
1149
+ "grad_norm": 4.63490629196167,
1150
+ "learning_rate": 2.3544577511643383e-05,
1151
+ "loss": 1.0947,
1152
+ "step": 3850
1153
+ },
1154
+ {
1155
+ "epoch": 5.800898203592815,
1156
+ "grad_norm": 4.218607425689697,
1157
+ "learning_rate": 2.3336660013306722e-05,
1158
+ "loss": 1.0745,
1159
+ "step": 3875
1160
+ },
1161
+ {
1162
+ "epoch": 5.838323353293413,
1163
+ "grad_norm": 4.426632404327393,
1164
+ "learning_rate": 2.312874251497006e-05,
1165
+ "loss": 1.1408,
1166
+ "step": 3900
1167
+ },
1168
+ {
1169
+ "epoch": 5.875748502994012,
1170
+ "grad_norm": 4.3070478439331055,
1171
+ "learning_rate": 2.29208250166334e-05,
1172
+ "loss": 1.0654,
1173
+ "step": 3925
1174
+ },
1175
+ {
1176
+ "epoch": 5.913173652694611,
1177
+ "grad_norm": 4.342469215393066,
1178
+ "learning_rate": 2.2712907518296743e-05,
1179
+ "loss": 1.148,
1180
+ "step": 3950
1181
+ },
1182
+ {
1183
+ "epoch": 5.950598802395209,
1184
+ "grad_norm": 4.6176862716674805,
1185
+ "learning_rate": 2.2504990019960082e-05,
1186
+ "loss": 1.1233,
1187
+ "step": 3975
1188
+ },
1189
+ {
1190
+ "epoch": 5.9880239520958085,
1191
+ "grad_norm": 4.570769309997559,
1192
+ "learning_rate": 2.229707252162342e-05,
1193
+ "loss": 1.1279,
1194
+ "step": 4000
1195
+ },
1196
+ {
1197
+ "epoch": 6.0,
1198
+ "eval_gen_len": 59.3168,
1199
+ "eval_loss": 1.5299330949783325,
1200
+ "eval_rouge1": 55.6399,
1201
+ "eval_rouge2": 32.8373,
1202
+ "eval_rougeL": 51.2672,
1203
+ "eval_rougeLsum": 51.362,
1204
+ "eval_runtime": 429.7929,
1205
+ "eval_samples_per_second": 3.886,
1206
+ "eval_steps_per_second": 0.195,
1207
+ "step": 4008
1208
+ },
1209
+ {
1210
+ "epoch": 6.025449101796407,
1211
+ "grad_norm": 3.6286568641662598,
1212
+ "learning_rate": 2.208915502328676e-05,
1213
+ "loss": 1.0893,
1214
+ "step": 4025
1215
+ },
1216
+ {
1217
+ "epoch": 6.062874251497006,
1218
+ "grad_norm": 3.461707830429077,
1219
+ "learning_rate": 2.1881237524950103e-05,
1220
+ "loss": 0.9844,
1221
+ "step": 4050
1222
+ },
1223
+ {
1224
+ "epoch": 6.100299401197605,
1225
+ "grad_norm": 4.06862735748291,
1226
+ "learning_rate": 2.1673320026613443e-05,
1227
+ "loss": 1.0244,
1228
+ "step": 4075
1229
+ },
1230
+ {
1231
+ "epoch": 6.137724550898204,
1232
+ "grad_norm": 4.019289016723633,
1233
+ "learning_rate": 2.1465402528276782e-05,
1234
+ "loss": 0.9887,
1235
+ "step": 4100
1236
+ },
1237
+ {
1238
+ "epoch": 6.175149700598802,
1239
+ "grad_norm": 3.8128530979156494,
1240
+ "learning_rate": 2.125748502994012e-05,
1241
+ "loss": 1.0166,
1242
+ "step": 4125
1243
+ },
1244
+ {
1245
+ "epoch": 6.212574850299402,
1246
+ "grad_norm": 4.389101982116699,
1247
+ "learning_rate": 2.104956753160346e-05,
1248
+ "loss": 1.0273,
1249
+ "step": 4150
1250
+ },
1251
+ {
1252
+ "epoch": 6.25,
1253
+ "grad_norm": 4.000133037567139,
1254
+ "learning_rate": 2.0841650033266803e-05,
1255
+ "loss": 0.9862,
1256
+ "step": 4175
1257
+ },
1258
+ {
1259
+ "epoch": 6.287425149700598,
1260
+ "grad_norm": 4.630964756011963,
1261
+ "learning_rate": 2.0633732534930142e-05,
1262
+ "loss": 1.0145,
1263
+ "step": 4200
1264
+ },
1265
+ {
1266
+ "epoch": 6.324850299401198,
1267
+ "grad_norm": 5.184226036071777,
1268
+ "learning_rate": 2.042581503659348e-05,
1269
+ "loss": 1.0428,
1270
+ "step": 4225
1271
+ },
1272
+ {
1273
+ "epoch": 6.362275449101796,
1274
+ "grad_norm": 3.8193106651306152,
1275
+ "learning_rate": 2.021789753825682e-05,
1276
+ "loss": 1.0174,
1277
+ "step": 4250
1278
+ },
1279
+ {
1280
+ "epoch": 6.399700598802395,
1281
+ "grad_norm": 3.856017827987671,
1282
+ "learning_rate": 2.0009980039920163e-05,
1283
+ "loss": 0.9793,
1284
+ "step": 4275
1285
+ },
1286
+ {
1287
+ "epoch": 6.437125748502994,
1288
+ "grad_norm": 4.4376220703125,
1289
+ "learning_rate": 1.98020625415835e-05,
1290
+ "loss": 1.0351,
1291
+ "step": 4300
1292
+ },
1293
+ {
1294
+ "epoch": 6.474550898203593,
1295
+ "grad_norm": 4.181103229522705,
1296
+ "learning_rate": 1.9594145043246838e-05,
1297
+ "loss": 0.9967,
1298
+ "step": 4325
1299
+ },
1300
+ {
1301
+ "epoch": 6.5119760479041915,
1302
+ "grad_norm": 4.993010520935059,
1303
+ "learning_rate": 1.938622754491018e-05,
1304
+ "loss": 0.9976,
1305
+ "step": 4350
1306
+ },
1307
+ {
1308
+ "epoch": 6.549401197604791,
1309
+ "grad_norm": 4.017760753631592,
1310
+ "learning_rate": 1.917831004657352e-05,
1311
+ "loss": 0.9715,
1312
+ "step": 4375
1313
+ },
1314
+ {
1315
+ "epoch": 6.586826347305389,
1316
+ "grad_norm": 4.2860002517700195,
1317
+ "learning_rate": 1.897039254823686e-05,
1318
+ "loss": 1.0256,
1319
+ "step": 4400
1320
+ },
1321
+ {
1322
+ "epoch": 6.624251497005988,
1323
+ "grad_norm": 4.793003082275391,
1324
+ "learning_rate": 1.87624750499002e-05,
1325
+ "loss": 1.025,
1326
+ "step": 4425
1327
+ },
1328
+ {
1329
+ "epoch": 6.661676646706587,
1330
+ "grad_norm": NaN,
1331
+ "learning_rate": 1.8562874251497005e-05,
1332
+ "loss": 1.0235,
1333
+ "step": 4450
1334
+ },
1335
+ {
1336
+ "epoch": 6.699101796407185,
1337
+ "grad_norm": 3.581146717071533,
1338
+ "learning_rate": 1.8354956753160347e-05,
1339
+ "loss": 0.9873,
1340
+ "step": 4475
1341
+ },
1342
+ {
1343
+ "epoch": 6.736526946107785,
1344
+ "grad_norm": 4.64194393157959,
1345
+ "learning_rate": 1.8147039254823687e-05,
1346
+ "loss": 1.023,
1347
+ "step": 4500
1348
+ },
1349
+ {
1350
+ "epoch": 6.773952095808383,
1351
+ "grad_norm": 4.145544052124023,
1352
+ "learning_rate": 1.7939121756487026e-05,
1353
+ "loss": 1.041,
1354
+ "step": 4525
1355
+ },
1356
+ {
1357
+ "epoch": 6.811377245508982,
1358
+ "grad_norm": 3.821073055267334,
1359
+ "learning_rate": 1.7731204258150365e-05,
1360
+ "loss": 1.0401,
1361
+ "step": 4550
1362
+ },
1363
+ {
1364
+ "epoch": 6.848802395209581,
1365
+ "grad_norm": 5.059972286224365,
1366
+ "learning_rate": 1.7523286759813708e-05,
1367
+ "loss": 1.0292,
1368
+ "step": 4575
1369
+ },
1370
+ {
1371
+ "epoch": 6.88622754491018,
1372
+ "grad_norm": 4.337078094482422,
1373
+ "learning_rate": 1.7315369261477047e-05,
1374
+ "loss": 1.0954,
1375
+ "step": 4600
1376
+ },
1377
+ {
1378
+ "epoch": 6.923652694610778,
1379
+ "grad_norm": 4.142930507659912,
1380
+ "learning_rate": 1.7107451763140386e-05,
1381
+ "loss": 1.0082,
1382
+ "step": 4625
1383
+ },
1384
+ {
1385
+ "epoch": 6.961077844311378,
1386
+ "grad_norm": 4.153197765350342,
1387
+ "learning_rate": 1.6899534264803725e-05,
1388
+ "loss": 1.0026,
1389
+ "step": 4650
1390
+ },
1391
+ {
1392
+ "epoch": 6.998502994011976,
1393
+ "grad_norm": 4.053616523742676,
1394
+ "learning_rate": 1.6691616766467068e-05,
1395
+ "loss": 1.0112,
1396
+ "step": 4675
1397
+ },
1398
+ {
1399
+ "epoch": 7.0,
1400
+ "eval_gen_len": 59.7293,
1401
+ "eval_loss": 1.5230909585952759,
1402
+ "eval_rouge1": 55.7716,
1403
+ "eval_rouge2": 33.0852,
1404
+ "eval_rougeL": 51.3404,
1405
+ "eval_rougeLsum": 51.4618,
1406
+ "eval_runtime": 376.4887,
1407
+ "eval_samples_per_second": 4.436,
1408
+ "eval_steps_per_second": 0.223,
1409
+ "step": 4676
1410
+ }
1411
+ ],
1412
+ "logging_steps": 25,
1413
+ "max_steps": 6680,
1414
+ "num_input_tokens_seen": 0,
1415
+ "num_train_epochs": 10,
1416
+ "save_steps": 500,
1417
+ "total_flos": 749389564477440.0,
1418
+ "train_batch_size": 10,
1419
+ "trial_name": null,
1420
+ "trial_params": null
1421
+ }
checkpoint-4676/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2938b4ba280bc0635867dead56a341625ab639267b9813724099ada202129ed4
3
+ size 5176
checkpoint-4676/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "Helsinki-NLP/opus-mt-da-en",
3
+ "_num_labels": 3,
4
+ "activation_dropout": 0.0,
5
+ "activation_function": "swish",
6
+ "add_bias_logits": false,
7
+ "add_final_layer_norm": false,
8
+ "architectures": [
9
+ "MarianMTModel"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bad_words_ids": [
13
+ [
14
+ 58929
15
+ ]
16
+ ],
17
+ "bos_token_id": 0,
18
+ "classif_dropout": 0.0,
19
+ "classifier_dropout": 0.0,
20
+ "d_model": 512,
21
+ "decoder_attention_heads": 8,
22
+ "decoder_ffn_dim": 2048,
23
+ "decoder_layerdrop": 0.0,
24
+ "decoder_layers": 6,
25
+ "decoder_start_token_id": 58929,
26
+ "decoder_vocab_size": 58930,
27
+ "dropout": 0.1,
28
+ "encoder_attention_heads": 8,
29
+ "encoder_ffn_dim": 2048,
30
+ "encoder_layerdrop": 0.0,
31
+ "encoder_layers": 6,
32
+ "eos_token_id": 0,
33
+ "forced_eos_token_id": 0,
34
+ "id2label": {
35
+ "0": "LABEL_0",
36
+ "1": "LABEL_1",
37
+ "2": "LABEL_2"
38
+ },
39
+ "init_std": 0.02,
40
+ "is_encoder_decoder": true,
41
+ "label2id": {
42
+ "LABEL_0": 0,
43
+ "LABEL_1": 1,
44
+ "LABEL_2": 2
45
+ },
46
+ "max_length": 512,
47
+ "max_position_embeddings": 512,
48
+ "model_type": "marian",
49
+ "normalize_before": false,
50
+ "normalize_embedding": false,
51
+ "num_beams": 4,
52
+ "num_hidden_layers": 6,
53
+ "pad_token_id": 58929,
54
+ "scale_embedding": true,
55
+ "share_encoder_decoder_embeddings": true,
56
+ "static_position_embeddings": true,
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.40.1",
59
+ "use_cache": true,
60
+ "vocab_size": 58930
61
+ }
generation_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bad_words_ids": [
3
+ [
4
+ 58929
5
+ ]
6
+ ],
7
+ "bos_token_id": 0,
8
+ "decoder_start_token_id": 58929,
9
+ "eos_token_id": 0,
10
+ "forced_eos_token_id": 0,
11
+ "max_length": 512,
12
+ "num_beams": 4,
13
+ "pad_token_id": 58929,
14
+ "renormalize_logits": true,
15
+ "transformers_version": "4.40.1"
16
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd59557e95ee48f4799a183b95004f37eeddb04d1f4125cf6d859fc60cedd5cd
3
+ size 297507400
runs/May09_12-56-42_r-lrj1981-ml-test-2or0gyz7-dece2-sq4de/events.out.tfevents.1715259405.r-lrj1981-ml-test-2or0gyz7-dece2-sq4de.148.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37f6e3cb6e8e7af22ce983d8b1772b88b708806b3cb28a020dde88374ccb5328
3
- size 54717
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06ce56e0e751fb923dd23e976becdfb405f22e8292d1a926a36db9241ca64cfa
3
+ size 61293
runs/May09_12-56-42_r-lrj1981-ml-test-2or0gyz7-dece2-sq4de/events.out.tfevents.1715264442.r-lrj1981-ml-test-2or0gyz7-dece2-sq4de.148.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47135a7d9a0bdb5fd5f3c4cd92208de0a05b5921b4411294d93c5062f7aa80a9
3
+ size 613
source.spm ADDED
Binary file (820 kB). View file
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "</s>",
3
+ "pad_token": "<pad>",
4
+ "unk_token": "<unk>"
5
+ }
target.spm ADDED
Binary file (788 kB). View file
 
tokenizer_config.json ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "</s>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<unk>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "58929": {
20
+ "content": "<pad>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "clean_up_tokenization_spaces": true,
29
+ "eos_token": "</s>",
30
+ "model_max_length": 512,
31
+ "pad_token": "<pad>",
32
+ "separate_vocabs": false,
33
+ "source_lang": "da",
34
+ "sp_model_kwargs": {},
35
+ "target_lang": "en",
36
+ "tokenizer_class": "MarianTokenizer",
37
+ "unk_token": "<unk>"
38
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2938b4ba280bc0635867dead56a341625ab639267b9813724099ada202129ed4
3
+ size 5176
training_params.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_path": "autotrain-ve993-lub6e/autotrain-data",
3
+ "model": "Helsinki-NLP/opus-mt-da-en",
4
+ "username": "LRJ1981",
5
+ "seed": 42,
6
+ "train_split": "train",
7
+ "valid_split": "validation",
8
+ "project_name": "autotrain-ve993-lub6e",
9
+ "push_to_hub": true,
10
+ "text_column": "autotrain_text",
11
+ "target_column": "autotrain_label",
12
+ "lr": 5e-05,
13
+ "epochs": 10,
14
+ "max_seq_length": 128,
15
+ "max_target_length": 128,
16
+ "batch_size": 10,
17
+ "warmup_ratio": 0.1,
18
+ "gradient_accumulation": 1,
19
+ "optimizer": "adamw_torch",
20
+ "scheduler": "linear",
21
+ "weight_decay": 0.0,
22
+ "max_grad_norm": 1.0,
23
+ "logging_steps": -1,
24
+ "evaluation_strategy": "epoch",
25
+ "auto_find_batch_size": false,
26
+ "mixed_precision": "fp16",
27
+ "save_total_limit": 1,
28
+ "peft": false,
29
+ "quantization": "int4",
30
+ "lora_r": 16,
31
+ "lora_alpha": 32,
32
+ "lora_dropout": 0.05,
33
+ "target_modules": "all-linear",
34
+ "log": "tensorboard"
35
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff