nid989 commited on
Commit
170359a
1 Parent(s): cc1abcb

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +288 -0
trainer_state.json ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 2700,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.37,
12
+ "learning_rate": 4.814814814814815e-05,
13
+ "loss": 0.8478,
14
+ "step": 100
15
+ },
16
+ {
17
+ "epoch": 0.74,
18
+ "learning_rate": 4.62962962962963e-05,
19
+ "loss": 0.3793,
20
+ "step": 200
21
+ },
22
+ {
23
+ "epoch": 1.0,
24
+ "eval_bleu": 16.2484,
25
+ "eval_gen_len": 18.4167,
26
+ "eval_loss": 0.3601939380168915,
27
+ "eval_meteor": 0.4221,
28
+ "eval_runtime": 13.5749,
29
+ "eval_samples_per_second": 4.42,
30
+ "eval_steps_per_second": 2.21,
31
+ "step": 270
32
+ },
33
+ {
34
+ "epoch": 1.11,
35
+ "learning_rate": 4.4444444444444447e-05,
36
+ "loss": 0.3099,
37
+ "step": 300
38
+ },
39
+ {
40
+ "epoch": 1.48,
41
+ "learning_rate": 4.259259259259259e-05,
42
+ "loss": 0.274,
43
+ "step": 400
44
+ },
45
+ {
46
+ "epoch": 1.85,
47
+ "learning_rate": 4.074074074074074e-05,
48
+ "loss": 0.2437,
49
+ "step": 500
50
+ },
51
+ {
52
+ "epoch": 2.0,
53
+ "eval_bleu": 17.2012,
54
+ "eval_gen_len": 18.45,
55
+ "eval_loss": 0.32714077830314636,
56
+ "eval_meteor": 0.4235,
57
+ "eval_runtime": 11.4881,
58
+ "eval_samples_per_second": 5.223,
59
+ "eval_steps_per_second": 2.611,
60
+ "step": 540
61
+ },
62
+ {
63
+ "epoch": 2.22,
64
+ "learning_rate": 3.888888888888889e-05,
65
+ "loss": 0.2115,
66
+ "step": 600
67
+ },
68
+ {
69
+ "epoch": 2.59,
70
+ "learning_rate": 3.7037037037037037e-05,
71
+ "loss": 0.1758,
72
+ "step": 700
73
+ },
74
+ {
75
+ "epoch": 2.96,
76
+ "learning_rate": 3.518518518518519e-05,
77
+ "loss": 0.1911,
78
+ "step": 800
79
+ },
80
+ {
81
+ "epoch": 3.0,
82
+ "eval_bleu": 17.6526,
83
+ "eval_gen_len": 18.5,
84
+ "eval_loss": 0.3079971969127655,
85
+ "eval_meteor": 0.4244,
86
+ "eval_runtime": 11.4927,
87
+ "eval_samples_per_second": 5.221,
88
+ "eval_steps_per_second": 2.61,
89
+ "step": 810
90
+ },
91
+ {
92
+ "epoch": 3.33,
93
+ "learning_rate": 3.3333333333333335e-05,
94
+ "loss": 0.136,
95
+ "step": 900
96
+ },
97
+ {
98
+ "epoch": 3.7,
99
+ "learning_rate": 3.148148148148148e-05,
100
+ "loss": 0.1508,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 4.0,
105
+ "eval_bleu": 17.6733,
106
+ "eval_gen_len": 18.5,
107
+ "eval_loss": 0.30199024081230164,
108
+ "eval_meteor": 0.4227,
109
+ "eval_runtime": 11.3901,
110
+ "eval_samples_per_second": 5.268,
111
+ "eval_steps_per_second": 2.634,
112
+ "step": 1080
113
+ },
114
+ {
115
+ "epoch": 4.07,
116
+ "learning_rate": 2.962962962962963e-05,
117
+ "loss": 0.1336,
118
+ "step": 1100
119
+ },
120
+ {
121
+ "epoch": 4.44,
122
+ "learning_rate": 2.777777777777778e-05,
123
+ "loss": 0.0853,
124
+ "step": 1200
125
+ },
126
+ {
127
+ "epoch": 4.81,
128
+ "learning_rate": 2.5925925925925925e-05,
129
+ "loss": 0.1042,
130
+ "step": 1300
131
+ },
132
+ {
133
+ "epoch": 5.0,
134
+ "eval_bleu": 17.1567,
135
+ "eval_gen_len": 18.45,
136
+ "eval_loss": 0.43005862832069397,
137
+ "eval_meteor": 0.421,
138
+ "eval_runtime": 11.4383,
139
+ "eval_samples_per_second": 5.246,
140
+ "eval_steps_per_second": 2.623,
141
+ "step": 1350
142
+ },
143
+ {
144
+ "epoch": 5.19,
145
+ "learning_rate": 2.4074074074074074e-05,
146
+ "loss": 0.0795,
147
+ "step": 1400
148
+ },
149
+ {
150
+ "epoch": 5.56,
151
+ "learning_rate": 2.2222222222222223e-05,
152
+ "loss": 0.0776,
153
+ "step": 1500
154
+ },
155
+ {
156
+ "epoch": 5.93,
157
+ "learning_rate": 2.037037037037037e-05,
158
+ "loss": 0.0712,
159
+ "step": 1600
160
+ },
161
+ {
162
+ "epoch": 6.0,
163
+ "eval_bleu": 17.5338,
164
+ "eval_gen_len": 18.5,
165
+ "eval_loss": 0.39506202936172485,
166
+ "eval_meteor": 0.4211,
167
+ "eval_runtime": 11.4467,
168
+ "eval_samples_per_second": 5.242,
169
+ "eval_steps_per_second": 2.621,
170
+ "step": 1620
171
+ },
172
+ {
173
+ "epoch": 6.3,
174
+ "learning_rate": 1.8518518518518518e-05,
175
+ "loss": 0.0548,
176
+ "step": 1700
177
+ },
178
+ {
179
+ "epoch": 6.67,
180
+ "learning_rate": 1.6666666666666667e-05,
181
+ "loss": 0.0516,
182
+ "step": 1800
183
+ },
184
+ {
185
+ "epoch": 7.0,
186
+ "eval_bleu": 17.1366,
187
+ "eval_gen_len": 18.4667,
188
+ "eval_loss": 0.44624677300453186,
189
+ "eval_meteor": 0.4185,
190
+ "eval_runtime": 11.4875,
191
+ "eval_samples_per_second": 5.223,
192
+ "eval_steps_per_second": 2.612,
193
+ "step": 1890
194
+ },
195
+ {
196
+ "epoch": 7.04,
197
+ "learning_rate": 1.4814814814814815e-05,
198
+ "loss": 0.0557,
199
+ "step": 1900
200
+ },
201
+ {
202
+ "epoch": 7.41,
203
+ "learning_rate": 1.2962962962962962e-05,
204
+ "loss": 0.0485,
205
+ "step": 2000
206
+ },
207
+ {
208
+ "epoch": 7.78,
209
+ "learning_rate": 1.1111111111111112e-05,
210
+ "loss": 0.043,
211
+ "step": 2100
212
+ },
213
+ {
214
+ "epoch": 8.0,
215
+ "eval_bleu": 17.6692,
216
+ "eval_gen_len": 18.5,
217
+ "eval_loss": 0.4671081006526947,
218
+ "eval_meteor": 0.4217,
219
+ "eval_runtime": 11.4117,
220
+ "eval_samples_per_second": 5.258,
221
+ "eval_steps_per_second": 2.629,
222
+ "step": 2160
223
+ },
224
+ {
225
+ "epoch": 8.15,
226
+ "learning_rate": 9.259259259259259e-06,
227
+ "loss": 0.0412,
228
+ "step": 2200
229
+ },
230
+ {
231
+ "epoch": 8.52,
232
+ "learning_rate": 7.4074074074074075e-06,
233
+ "loss": 0.0402,
234
+ "step": 2300
235
+ },
236
+ {
237
+ "epoch": 8.89,
238
+ "learning_rate": 5.555555555555556e-06,
239
+ "loss": 0.0243,
240
+ "step": 2400
241
+ },
242
+ {
243
+ "epoch": 9.0,
244
+ "eval_bleu": 17.5591,
245
+ "eval_gen_len": 18.5,
246
+ "eval_loss": 0.5049145221710205,
247
+ "eval_meteor": 0.4214,
248
+ "eval_runtime": 11.458,
249
+ "eval_samples_per_second": 5.237,
250
+ "eval_steps_per_second": 2.618,
251
+ "step": 2430
252
+ },
253
+ {
254
+ "epoch": 9.26,
255
+ "learning_rate": 3.7037037037037037e-06,
256
+ "loss": 0.0475,
257
+ "step": 2500
258
+ },
259
+ {
260
+ "epoch": 9.63,
261
+ "learning_rate": 1.8518518518518519e-06,
262
+ "loss": 0.0324,
263
+ "step": 2600
264
+ },
265
+ {
266
+ "epoch": 10.0,
267
+ "learning_rate": 0.0,
268
+ "loss": 0.0313,
269
+ "step": 2700
270
+ },
271
+ {
272
+ "epoch": 10.0,
273
+ "eval_bleu": 17.4599,
274
+ "eval_gen_len": 18.5,
275
+ "eval_loss": 0.5202592015266418,
276
+ "eval_meteor": 0.4218,
277
+ "eval_runtime": 11.5675,
278
+ "eval_samples_per_second": 5.187,
279
+ "eval_steps_per_second": 2.593,
280
+ "step": 2700
281
+ }
282
+ ],
283
+ "max_steps": 2700,
284
+ "num_train_epochs": 10,
285
+ "total_flos": 163370071388160.0,
286
+ "trial_name": null,
287
+ "trial_params": null
288
+ }