jorgeortizfuentes commited on
Commit
96c0cbf
1 Parent(s): 2ba18b8

End of training

Browse files
all_results.json ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_bleu": 0.9098,
4
+ "eval_gen_len": 235.1613,
5
+ "eval_loss": 0.163666233420372,
6
+ "eval_runtime": 7809.6178,
7
+ "eval_samples": 78039,
8
+ "eval_samples_per_second": 9.993,
9
+ "eval_steps_per_second": 0.156,
10
+ "train_loss": 0.21620578862409723,
11
+ "train_runtime": 45769.0844,
12
+ "train_samples": 624312,
13
+ "train_samples_per_second": 27.281,
14
+ "train_steps_per_second": 0.426
15
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "eval_bleu": 0.9098,
4
+ "eval_gen_len": 235.1613,
5
+ "eval_loss": 0.163666233420372,
6
+ "eval_runtime": 7809.6178,
7
+ "eval_samples": 78039,
8
+ "eval_samples_per_second": 9.993,
9
+ "eval_steps_per_second": 0.156
10
+ }
runs/Apr11_19-26-37_relela-04/events.out.tfevents.1681309449.relela-04.1511770.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf60caf8c6b398a12fb5137a76d4b29951f4ad699116a1afd60217c36c996e8b
3
+ size 417
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.0,
3
+ "train_loss": 0.21620578862409723,
4
+ "train_runtime": 45769.0844,
5
+ "train_samples": 624312,
6
+ "train_samples_per_second": 27.281,
7
+ "train_steps_per_second": 0.426
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
+ "global_step": 19510,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.05,
12
+ "learning_rate": 4.8718605843157357e-05,
13
+ "loss": 0.5024,
14
+ "step": 500
15
+ },
16
+ {
17
+ "epoch": 0.1,
18
+ "learning_rate": 4.743721168631472e-05,
19
+ "loss": 0.3004,
20
+ "step": 1000
21
+ },
22
+ {
23
+ "epoch": 0.15,
24
+ "learning_rate": 4.6155817529472065e-05,
25
+ "loss": 0.2771,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.21,
30
+ "learning_rate": 4.487442337262942e-05,
31
+ "loss": 0.2631,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.26,
36
+ "learning_rate": 4.359302921578678e-05,
37
+ "loss": 0.2525,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.31,
42
+ "learning_rate": 4.2311635058944134e-05,
43
+ "loss": 0.246,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.36,
48
+ "learning_rate": 4.103024090210149e-05,
49
+ "loss": 0.2379,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.41,
54
+ "learning_rate": 3.974884674525884e-05,
55
+ "loss": 0.2329,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.46,
60
+ "learning_rate": 3.8467452588416197e-05,
61
+ "loss": 0.2266,
62
+ "step": 4500
63
+ },
64
+ {
65
+ "epoch": 0.51,
66
+ "learning_rate": 3.718605843157356e-05,
67
+ "loss": 0.224,
68
+ "step": 5000
69
+ },
70
+ {
71
+ "epoch": 0.56,
72
+ "learning_rate": 3.590466427473091e-05,
73
+ "loss": 0.219,
74
+ "step": 5500
75
+ },
76
+ {
77
+ "epoch": 0.62,
78
+ "learning_rate": 3.462327011788826e-05,
79
+ "loss": 0.2148,
80
+ "step": 6000
81
+ },
82
+ {
83
+ "epoch": 0.67,
84
+ "learning_rate": 3.334187596104562e-05,
85
+ "loss": 0.2128,
86
+ "step": 6500
87
+ },
88
+ {
89
+ "epoch": 0.72,
90
+ "learning_rate": 3.2060481804202974e-05,
91
+ "loss": 0.2118,
92
+ "step": 7000
93
+ },
94
+ {
95
+ "epoch": 0.77,
96
+ "learning_rate": 3.077908764736033e-05,
97
+ "loss": 0.2088,
98
+ "step": 7500
99
+ },
100
+ {
101
+ "epoch": 0.82,
102
+ "learning_rate": 2.9497693490517686e-05,
103
+ "loss": 0.2042,
104
+ "step": 8000
105
+ },
106
+ {
107
+ "epoch": 0.87,
108
+ "learning_rate": 2.821629933367504e-05,
109
+ "loss": 0.2031,
110
+ "step": 8500
111
+ },
112
+ {
113
+ "epoch": 0.92,
114
+ "learning_rate": 2.693490517683239e-05,
115
+ "loss": 0.2031,
116
+ "step": 9000
117
+ },
118
+ {
119
+ "epoch": 0.97,
120
+ "learning_rate": 2.565351101998975e-05,
121
+ "loss": 0.1993,
122
+ "step": 9500
123
+ },
124
+ {
125
+ "epoch": 1.0,
126
+ "eval_bleu": 0.0,
127
+ "eval_gen_len": 19.0,
128
+ "eval_loss": 0.17119529843330383,
129
+ "eval_runtime": 1684.1894,
130
+ "eval_samples_per_second": 46.336,
131
+ "eval_steps_per_second": 0.724,
132
+ "step": 9755
133
+ },
134
+ {
135
+ "epoch": 1.03,
136
+ "learning_rate": 2.4372116863147106e-05,
137
+ "loss": 0.1977,
138
+ "step": 10000
139
+ },
140
+ {
141
+ "epoch": 1.08,
142
+ "learning_rate": 2.309072270630446e-05,
143
+ "loss": 0.1966,
144
+ "step": 10500
145
+ },
146
+ {
147
+ "epoch": 1.13,
148
+ "learning_rate": 2.1809328549461817e-05,
149
+ "loss": 0.1957,
150
+ "step": 11000
151
+ },
152
+ {
153
+ "epoch": 1.18,
154
+ "learning_rate": 2.052793439261917e-05,
155
+ "loss": 0.1938,
156
+ "step": 11500
157
+ },
158
+ {
159
+ "epoch": 1.23,
160
+ "learning_rate": 1.9246540235776526e-05,
161
+ "loss": 0.1927,
162
+ "step": 12000
163
+ },
164
+ {
165
+ "epoch": 1.28,
166
+ "learning_rate": 1.796514607893388e-05,
167
+ "loss": 0.1926,
168
+ "step": 12500
169
+ },
170
+ {
171
+ "epoch": 1.33,
172
+ "learning_rate": 1.6683751922091237e-05,
173
+ "loss": 0.1908,
174
+ "step": 13000
175
+ },
176
+ {
177
+ "epoch": 1.38,
178
+ "learning_rate": 1.540235776524859e-05,
179
+ "loss": 0.1901,
180
+ "step": 13500
181
+ },
182
+ {
183
+ "epoch": 1.44,
184
+ "learning_rate": 1.4120963608405946e-05,
185
+ "loss": 0.1892,
186
+ "step": 14000
187
+ },
188
+ {
189
+ "epoch": 1.49,
190
+ "learning_rate": 1.2839569451563302e-05,
191
+ "loss": 0.1886,
192
+ "step": 14500
193
+ },
194
+ {
195
+ "epoch": 1.54,
196
+ "learning_rate": 1.1558175294720656e-05,
197
+ "loss": 0.1893,
198
+ "step": 15000
199
+ },
200
+ {
201
+ "epoch": 1.59,
202
+ "learning_rate": 1.0276781137878012e-05,
203
+ "loss": 0.1872,
204
+ "step": 15500
205
+ },
206
+ {
207
+ "epoch": 1.64,
208
+ "learning_rate": 8.995386981035367e-06,
209
+ "loss": 0.1879,
210
+ "step": 16000
211
+ },
212
+ {
213
+ "epoch": 1.69,
214
+ "learning_rate": 7.713992824192722e-06,
215
+ "loss": 0.1862,
216
+ "step": 16500
217
+ },
218
+ {
219
+ "epoch": 1.74,
220
+ "learning_rate": 6.432598667350077e-06,
221
+ "loss": 0.1862,
222
+ "step": 17000
223
+ },
224
+ {
225
+ "epoch": 1.79,
226
+ "learning_rate": 5.151204510507432e-06,
227
+ "loss": 0.1861,
228
+ "step": 17500
229
+ },
230
+ {
231
+ "epoch": 1.85,
232
+ "learning_rate": 3.869810353664787e-06,
233
+ "loss": 0.185,
234
+ "step": 18000
235
+ },
236
+ {
237
+ "epoch": 1.9,
238
+ "learning_rate": 2.588416196822143e-06,
239
+ "loss": 0.1848,
240
+ "step": 18500
241
+ },
242
+ {
243
+ "epoch": 1.95,
244
+ "learning_rate": 1.3070220399794978e-06,
245
+ "loss": 0.186,
246
+ "step": 19000
247
+ },
248
+ {
249
+ "epoch": 2.0,
250
+ "learning_rate": 2.5627883136852895e-08,
251
+ "loss": 0.1863,
252
+ "step": 19500
253
+ },
254
+ {
255
+ "epoch": 2.0,
256
+ "eval_bleu": 0.0,
257
+ "eval_gen_len": 19.0,
258
+ "eval_loss": 0.163666233420372,
259
+ "eval_runtime": 1684.6721,
260
+ "eval_samples_per_second": 46.323,
261
+ "eval_steps_per_second": 0.724,
262
+ "step": 19510
263
+ },
264
+ {
265
+ "epoch": 2.0,
266
+ "step": 19510,
267
+ "total_flos": 3.801797311667896e+17,
268
+ "train_loss": 0.21620578862409723,
269
+ "train_runtime": 45769.0844,
270
+ "train_samples_per_second": 27.281,
271
+ "train_steps_per_second": 0.426
272
+ }
273
+ ],
274
+ "max_steps": 19510,
275
+ "num_train_epochs": 2,
276
+ "total_flos": 3.801797311667896e+17,
277
+ "trial_name": null,
278
+ "trial_params": null
279
+ }