gonzpen commited on
Commit
a80ab73
1 Parent(s): 8279150

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +289 -0
trainer_state.json ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.95920731151508,
3
+ "best_model_checkpoint": "/scicore/home/lauerg/cerque0000/models/nlp-job-ads/gbert-large-ft-edu-redux/checkpoint-1400",
4
+ "epoch": 9.0,
5
+ "global_step": 2664,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.34,
12
+ "eval_loss": 0.29267561435699463,
13
+ "eval_lrap": 0.7186470358938737,
14
+ "eval_runtime": 22.3251,
15
+ "eval_samples_per_second": 30.101,
16
+ "eval_steps_per_second": 3.763,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.68,
21
+ "eval_loss": 0.19173942506313324,
22
+ "eval_lrap": 0.8874944283175044,
23
+ "eval_runtime": 22.732,
24
+ "eval_samples_per_second": 29.562,
25
+ "eval_steps_per_second": 3.695,
26
+ "step": 200
27
+ },
28
+ {
29
+ "epoch": 1.01,
30
+ "eval_loss": 0.1740952730178833,
31
+ "eval_lrap": 0.9086508545541496,
32
+ "eval_runtime": 22.6942,
33
+ "eval_samples_per_second": 29.611,
34
+ "eval_steps_per_second": 3.701,
35
+ "step": 300
36
+ },
37
+ {
38
+ "epoch": 1.35,
39
+ "eval_loss": 0.19585496187210083,
40
+ "eval_lrap": 0.9103675474295027,
41
+ "eval_runtime": 23.3607,
42
+ "eval_samples_per_second": 28.766,
43
+ "eval_steps_per_second": 3.596,
44
+ "step": 400
45
+ },
46
+ {
47
+ "epoch": 1.69,
48
+ "learning_rate": 2.193243243243243e-05,
49
+ "loss": 0.2242,
50
+ "step": 500
51
+ },
52
+ {
53
+ "epoch": 1.69,
54
+ "eval_loss": 0.1878644824028015,
55
+ "eval_lrap": 0.9258567343567102,
56
+ "eval_runtime": 22.7639,
57
+ "eval_samples_per_second": 29.52,
58
+ "eval_steps_per_second": 3.69,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 2.03,
63
+ "eval_loss": 0.16258566081523895,
64
+ "eval_lrap": 0.9308544571355302,
65
+ "eval_runtime": 23.3248,
66
+ "eval_samples_per_second": 28.81,
67
+ "eval_steps_per_second": 3.601,
68
+ "step": 600
69
+ },
70
+ {
71
+ "epoch": 2.36,
72
+ "eval_loss": 0.15402589738368988,
73
+ "eval_lrap": 0.9306379874972384,
74
+ "eval_runtime": 22.759,
75
+ "eval_samples_per_second": 29.527,
76
+ "eval_steps_per_second": 3.691,
77
+ "step": 700
78
+ },
79
+ {
80
+ "epoch": 2.7,
81
+ "eval_loss": 0.1420990228652954,
82
+ "eval_lrap": 0.9364025033506702,
83
+ "eval_runtime": 23.3898,
84
+ "eval_samples_per_second": 28.73,
85
+ "eval_steps_per_second": 3.591,
86
+ "step": 800
87
+ },
88
+ {
89
+ "epoch": 3.04,
90
+ "eval_loss": 0.14033490419387817,
91
+ "eval_lrap": 0.9166296066698628,
92
+ "eval_runtime": 22.7034,
93
+ "eval_samples_per_second": 29.599,
94
+ "eval_steps_per_second": 3.7,
95
+ "step": 900
96
+ },
97
+ {
98
+ "epoch": 3.38,
99
+ "learning_rate": 1.6864864864864864e-05,
100
+ "loss": 0.1316,
101
+ "step": 1000
102
+ },
103
+ {
104
+ "epoch": 3.38,
105
+ "eval_loss": 0.13963457942008972,
106
+ "eval_lrap": 0.9372152596624732,
107
+ "eval_runtime": 23.3136,
108
+ "eval_samples_per_second": 28.824,
109
+ "eval_steps_per_second": 3.603,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 3.72,
114
+ "eval_loss": 0.1491432934999466,
115
+ "eval_lrap": 0.9427461184278115,
116
+ "eval_runtime": 22.6955,
117
+ "eval_samples_per_second": 29.609,
118
+ "eval_steps_per_second": 3.701,
119
+ "step": 1100
120
+ },
121
+ {
122
+ "epoch": 4.05,
123
+ "eval_loss": 0.14092977344989777,
124
+ "eval_lrap": 0.9405212229801448,
125
+ "eval_runtime": 22.7532,
126
+ "eval_samples_per_second": 29.534,
127
+ "eval_steps_per_second": 3.692,
128
+ "step": 1200
129
+ },
130
+ {
131
+ "epoch": 4.39,
132
+ "eval_loss": 0.13760367035865784,
133
+ "eval_lrap": 0.930910014527521,
134
+ "eval_runtime": 22.795,
135
+ "eval_samples_per_second": 29.48,
136
+ "eval_steps_per_second": 3.685,
137
+ "step": 1300
138
+ },
139
+ {
140
+ "epoch": 4.73,
141
+ "eval_loss": 0.13160455226898193,
142
+ "eval_lrap": 0.95920731151508,
143
+ "eval_runtime": 22.6631,
144
+ "eval_samples_per_second": 29.652,
145
+ "eval_steps_per_second": 3.706,
146
+ "step": 1400
147
+ },
148
+ {
149
+ "epoch": 5.07,
150
+ "learning_rate": 1.1797297297297297e-05,
151
+ "loss": 0.0757,
152
+ "step": 1500
153
+ },
154
+ {
155
+ "epoch": 5.07,
156
+ "eval_loss": 0.12442280352115631,
157
+ "eval_lrap": 0.9517553489380385,
158
+ "eval_runtime": 23.4283,
159
+ "eval_samples_per_second": 28.683,
160
+ "eval_steps_per_second": 3.585,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 5.41,
165
+ "eval_loss": 0.12935081124305725,
166
+ "eval_lrap": 0.9447551279048475,
167
+ "eval_runtime": 23.3876,
168
+ "eval_samples_per_second": 28.733,
169
+ "eval_steps_per_second": 3.592,
170
+ "step": 1600
171
+ },
172
+ {
173
+ "epoch": 5.74,
174
+ "eval_loss": 0.13968029618263245,
175
+ "eval_lrap": 0.9520049098544898,
176
+ "eval_runtime": 23.3148,
177
+ "eval_samples_per_second": 28.823,
178
+ "eval_steps_per_second": 3.603,
179
+ "step": 1700
180
+ },
181
+ {
182
+ "epoch": 6.08,
183
+ "eval_loss": 0.1321108192205429,
184
+ "eval_lrap": 0.9493008222774945,
185
+ "eval_runtime": 22.7141,
186
+ "eval_samples_per_second": 29.585,
187
+ "eval_steps_per_second": 3.698,
188
+ "step": 1800
189
+ },
190
+ {
191
+ "epoch": 6.42,
192
+ "eval_loss": 0.1369735449552536,
193
+ "eval_lrap": 0.9438046186531343,
194
+ "eval_runtime": 22.7987,
195
+ "eval_samples_per_second": 29.475,
196
+ "eval_steps_per_second": 3.684,
197
+ "step": 1900
198
+ },
199
+ {
200
+ "epoch": 6.76,
201
+ "learning_rate": 6.729729729729729e-06,
202
+ "loss": 0.04,
203
+ "step": 2000
204
+ },
205
+ {
206
+ "epoch": 6.76,
207
+ "eval_loss": 0.13185060024261475,
208
+ "eval_lrap": 0.9582819439486873,
209
+ "eval_runtime": 22.7045,
210
+ "eval_samples_per_second": 29.598,
211
+ "eval_steps_per_second": 3.7,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 7.09,
216
+ "eval_loss": 0.14138825237751007,
217
+ "eval_lrap": 0.9504156521289109,
218
+ "eval_runtime": 23.4267,
219
+ "eval_samples_per_second": 28.685,
220
+ "eval_steps_per_second": 3.586,
221
+ "step": 2100
222
+ },
223
+ {
224
+ "epoch": 7.43,
225
+ "eval_loss": 0.14279799163341522,
226
+ "eval_lrap": 0.9578134572863043,
227
+ "eval_runtime": 22.7296,
228
+ "eval_samples_per_second": 29.565,
229
+ "eval_steps_per_second": 3.696,
230
+ "step": 2200
231
+ },
232
+ {
233
+ "epoch": 7.77,
234
+ "eval_loss": 0.14730122685432434,
235
+ "eval_lrap": 0.9416550938849539,
236
+ "eval_runtime": 22.7712,
237
+ "eval_samples_per_second": 29.511,
238
+ "eval_steps_per_second": 3.689,
239
+ "step": 2300
240
+ },
241
+ {
242
+ "epoch": 8.11,
243
+ "eval_loss": 0.14928565919399261,
244
+ "eval_lrap": 0.9514275899253632,
245
+ "eval_runtime": 23.4136,
246
+ "eval_samples_per_second": 28.701,
247
+ "eval_steps_per_second": 3.588,
248
+ "step": 2400
249
+ },
250
+ {
251
+ "epoch": 8.45,
252
+ "learning_rate": 1.662162162162162e-06,
253
+ "loss": 0.0204,
254
+ "step": 2500
255
+ },
256
+ {
257
+ "epoch": 8.45,
258
+ "eval_loss": 0.14889824390411377,
259
+ "eval_lrap": 0.955463596740468,
260
+ "eval_runtime": 23.4183,
261
+ "eval_samples_per_second": 28.695,
262
+ "eval_steps_per_second": 3.587,
263
+ "step": 2500
264
+ },
265
+ {
266
+ "epoch": 8.78,
267
+ "eval_loss": 0.14893263578414917,
268
+ "eval_lrap": 0.9515173886897298,
269
+ "eval_runtime": 23.311,
270
+ "eval_samples_per_second": 28.828,
271
+ "eval_steps_per_second": 3.603,
272
+ "step": 2600
273
+ },
274
+ {
275
+ "epoch": 9.0,
276
+ "step": 2664,
277
+ "total_flos": 1.9853269227408384e+16,
278
+ "train_loss": 0.09327275080007834,
279
+ "train_runtime": 2835.2925,
280
+ "train_samples_per_second": 7.514,
281
+ "train_steps_per_second": 0.94
282
+ }
283
+ ],
284
+ "max_steps": 2664,
285
+ "num_train_epochs": 9,
286
+ "total_flos": 1.9853269227408384e+16,
287
+ "trial_name": null,
288
+ "trial_params": null
289
+ }