gokuls commited on
Commit
23fcbb3
1 Parent(s): bc0d00f

End of training

Browse files
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "epoch": 17.0,
3
  "eval_loss": 0.6131083965301514,
4
  "eval_matthews_correlation": 0.0,
5
- "eval_runtime": 1.0977,
6
  "eval_samples": 1043,
7
- "eval_samples_per_second": 950.147,
8
- "eval_steps_per_second": 4.555,
9
- "train_loss": 0.11972142585833592,
10
- "train_runtime": 177.9873,
11
  "train_samples": 8551,
12
- "train_samples_per_second": 2402.138,
13
- "train_steps_per_second": 9.551
14
  }
 
1
  {
2
+ "epoch": 12.0,
3
  "eval_loss": 0.6131083965301514,
4
  "eval_matthews_correlation": 0.0,
5
+ "eval_runtime": 1.1058,
6
  "eval_samples": 1043,
7
+ "eval_samples_per_second": 943.228,
8
+ "eval_steps_per_second": 4.522,
9
+ "train_loss": 0.5614397408915501,
10
+ "train_runtime": 434.3841,
11
  "train_samples": 8551,
12
+ "train_samples_per_second": 984.267,
13
+ "train_steps_per_second": 3.914
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 17.0,
3
  "eval_loss": 0.6131083965301514,
4
  "eval_matthews_correlation": 0.0,
5
- "eval_runtime": 1.0977,
6
  "eval_samples": 1043,
7
- "eval_samples_per_second": 950.147,
8
- "eval_steps_per_second": 4.555
9
  }
 
1
  {
2
+ "epoch": 12.0,
3
  "eval_loss": 0.6131083965301514,
4
  "eval_matthews_correlation": 0.0,
5
+ "eval_runtime": 1.1058,
6
  "eval_samples": 1043,
7
+ "eval_samples_per_second": 943.228,
8
+ "eval_steps_per_second": 4.522
9
  }
logs/events.out.tfevents.1674303956.serv-3331.3429984.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ddd3c2e29fffad0c414d2b3883e736b574d6075dc66594513b49cb7233b33ca
3
+ size 375
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 17.0,
3
- "train_loss": 0.11972142585833592,
4
- "train_runtime": 177.9873,
5
  "train_samples": 8551,
6
- "train_samples_per_second": 2402.138,
7
- "train_steps_per_second": 9.551
8
  }
 
1
  {
2
+ "epoch": 12.0,
3
+ "train_loss": 0.5614397408915501,
4
+ "train_runtime": 434.3841,
5
  "train_samples": 8551,
6
+ "train_samples_per_second": 984.267,
7
+ "train_steps_per_second": 3.914
8
  }
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": 0.6131083965301514,
3
  "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-238",
4
- "epoch": 17.0,
5
- "global_step": 578,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -17,9 +17,9 @@
17
  "epoch": 1.0,
18
  "eval_loss": 0.6238651871681213,
19
  "eval_matthews_correlation": 0.0,
20
- "eval_runtime": 1.5899,
21
- "eval_samples_per_second": 656.009,
22
- "eval_steps_per_second": 3.145,
23
  "step": 34
24
  },
25
  {
@@ -32,9 +32,9 @@
32
  "epoch": 2.0,
33
  "eval_loss": 0.6178815364837646,
34
  "eval_matthews_correlation": 0.0,
35
- "eval_runtime": 1.5812,
36
- "eval_samples_per_second": 659.606,
37
- "eval_steps_per_second": 3.162,
38
  "step": 68
39
  },
40
  {
@@ -47,9 +47,9 @@
47
  "epoch": 3.0,
48
  "eval_loss": 0.6179934144020081,
49
  "eval_matthews_correlation": 0.0,
50
- "eval_runtime": 2.4438,
51
- "eval_samples_per_second": 426.802,
52
- "eval_steps_per_second": 2.046,
53
  "step": 102
54
  },
55
  {
@@ -62,9 +62,9 @@
62
  "epoch": 4.0,
63
  "eval_loss": 0.6175711750984192,
64
  "eval_matthews_correlation": 0.0,
65
- "eval_runtime": 1.7323,
66
- "eval_samples_per_second": 602.096,
67
- "eval_steps_per_second": 2.886,
68
  "step": 136
69
  },
70
  {
@@ -77,9 +77,9 @@
77
  "epoch": 5.0,
78
  "eval_loss": 0.6172661781311035,
79
  "eval_matthews_correlation": 0.0,
80
- "eval_runtime": 1.7156,
81
- "eval_samples_per_second": 607.942,
82
- "eval_steps_per_second": 2.914,
83
  "step": 170
84
  },
85
  {
@@ -92,9 +92,9 @@
92
  "epoch": 6.0,
93
  "eval_loss": 0.6166184544563293,
94
  "eval_matthews_correlation": 0.0,
95
- "eval_runtime": 1.9408,
96
- "eval_samples_per_second": 537.405,
97
- "eval_steps_per_second": 2.576,
98
  "step": 204
99
  },
100
  {
@@ -107,9 +107,9 @@
107
  "epoch": 7.0,
108
  "eval_loss": 0.6131083965301514,
109
  "eval_matthews_correlation": 0.0,
110
- "eval_runtime": 1.7507,
111
- "eval_samples_per_second": 595.751,
112
- "eval_steps_per_second": 2.856,
113
  "step": 238
114
  },
115
  {
@@ -122,9 +122,9 @@
122
  "epoch": 8.0,
123
  "eval_loss": 0.6240708231925964,
124
  "eval_matthews_correlation": 0.0951039122870703,
125
- "eval_runtime": 1.7156,
126
- "eval_samples_per_second": 607.967,
127
- "eval_steps_per_second": 2.915,
128
  "step": 272
129
  },
130
  {
@@ -137,9 +137,9 @@
137
  "epoch": 9.0,
138
  "eval_loss": 0.6361746191978455,
139
  "eval_matthews_correlation": 0.059760920069176514,
140
- "eval_runtime": 1.9833,
141
- "eval_samples_per_second": 525.879,
142
- "eval_steps_per_second": 2.521,
143
  "step": 306
144
  },
145
  {
@@ -152,9 +152,9 @@
152
  "epoch": 10.0,
153
  "eval_loss": 0.7009902596473694,
154
  "eval_matthews_correlation": 0.08008155523655092,
155
- "eval_runtime": 1.7208,
156
- "eval_samples_per_second": 606.103,
157
- "eval_steps_per_second": 2.906,
158
  "step": 340
159
  },
160
  {
@@ -167,9 +167,9 @@
167
  "epoch": 11.0,
168
  "eval_loss": 0.6731011867523193,
169
  "eval_matthews_correlation": 0.09051190856095573,
170
- "eval_runtime": 1.7848,
171
- "eval_samples_per_second": 584.391,
172
- "eval_steps_per_second": 2.801,
173
  "step": 374
174
  },
175
  {
@@ -182,99 +182,24 @@
182
  "epoch": 12.0,
183
  "eval_loss": 0.6893478035926819,
184
  "eval_matthews_correlation": 0.09007205990892461,
185
- "eval_runtime": 1.959,
186
- "eval_samples_per_second": 532.427,
187
- "eval_steps_per_second": 2.552,
188
  "step": 408
189
  },
190
  {
191
- "epoch": 13.0,
192
- "learning_rate": 3.7e-05,
193
- "loss": 0.4319,
194
- "step": 442
195
- },
196
- {
197
- "epoch": 13.0,
198
- "eval_loss": 0.703128457069397,
199
- "eval_matthews_correlation": 0.08499911671189282,
200
- "eval_runtime": 1.0793,
201
- "eval_samples_per_second": 966.409,
202
- "eval_steps_per_second": 4.633,
203
- "step": 442
204
- },
205
- {
206
- "epoch": 14.0,
207
- "learning_rate": 3.6e-05,
208
- "loss": 0.4153,
209
- "step": 476
210
- },
211
- {
212
- "epoch": 14.0,
213
- "eval_loss": 0.718442440032959,
214
- "eval_matthews_correlation": 0.08205498032090315,
215
- "eval_runtime": 1.0885,
216
- "eval_samples_per_second": 958.214,
217
- "eval_steps_per_second": 4.594,
218
- "step": 476
219
- },
220
- {
221
- "epoch": 15.0,
222
- "learning_rate": 3.5e-05,
223
- "loss": 0.4048,
224
- "step": 510
225
- },
226
- {
227
- "epoch": 15.0,
228
- "eval_loss": 0.8069401383399963,
229
- "eval_matthews_correlation": 0.10451124263480288,
230
- "eval_runtime": 1.2661,
231
- "eval_samples_per_second": 823.769,
232
- "eval_steps_per_second": 3.949,
233
- "step": 510
234
- },
235
- {
236
- "epoch": 16.0,
237
- "learning_rate": 3.4000000000000007e-05,
238
- "loss": 0.3926,
239
- "step": 544
240
- },
241
- {
242
- "epoch": 16.0,
243
- "eval_loss": 0.7495270371437073,
244
- "eval_matthews_correlation": 0.13154834951294067,
245
- "eval_runtime": 1.1233,
246
- "eval_samples_per_second": 928.509,
247
- "eval_steps_per_second": 4.451,
248
- "step": 544
249
- },
250
- {
251
- "epoch": 17.0,
252
- "learning_rate": 3.3e-05,
253
- "loss": 0.3907,
254
- "step": 578
255
- },
256
- {
257
- "epoch": 17.0,
258
- "eval_loss": 0.8371085524559021,
259
- "eval_matthews_correlation": 0.10202308372884508,
260
- "eval_runtime": 1.1282,
261
- "eval_samples_per_second": 924.472,
262
- "eval_steps_per_second": 4.432,
263
- "step": 578
264
- },
265
- {
266
- "epoch": 17.0,
267
- "step": 578,
268
- "total_flos": 4616521615671296.0,
269
- "train_loss": 0.11972142585833592,
270
- "train_runtime": 177.9873,
271
- "train_samples_per_second": 2402.138,
272
- "train_steps_per_second": 9.551
273
  }
274
  ],
275
  "max_steps": 1700,
276
  "num_train_epochs": 50,
277
- "total_flos": 4616521615671296.0,
278
  "trial_name": null,
279
  "trial_params": null
280
  }
 
1
  {
2
  "best_metric": 0.6131083965301514,
3
  "best_model_checkpoint": "mobilebert_sa_GLUE_Experiment_cola/checkpoint-238",
4
+ "epoch": 12.0,
5
+ "global_step": 408,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
17
  "epoch": 1.0,
18
  "eval_loss": 0.6238651871681213,
19
  "eval_matthews_correlation": 0.0,
20
+ "eval_runtime": 1.0726,
21
+ "eval_samples_per_second": 972.413,
22
+ "eval_steps_per_second": 4.662,
23
  "step": 34
24
  },
25
  {
 
32
  "epoch": 2.0,
33
  "eval_loss": 0.6178815364837646,
34
  "eval_matthews_correlation": 0.0,
35
+ "eval_runtime": 1.0582,
36
+ "eval_samples_per_second": 985.593,
37
+ "eval_steps_per_second": 4.725,
38
  "step": 68
39
  },
40
  {
 
47
  "epoch": 3.0,
48
  "eval_loss": 0.6179934144020081,
49
  "eval_matthews_correlation": 0.0,
50
+ "eval_runtime": 1.3387,
51
+ "eval_samples_per_second": 779.113,
52
+ "eval_steps_per_second": 3.735,
53
  "step": 102
54
  },
55
  {
 
62
  "epoch": 4.0,
63
  "eval_loss": 0.6175711750984192,
64
  "eval_matthews_correlation": 0.0,
65
+ "eval_runtime": 1.1189,
66
+ "eval_samples_per_second": 932.188,
67
+ "eval_steps_per_second": 4.469,
68
  "step": 136
69
  },
70
  {
 
77
  "epoch": 5.0,
78
  "eval_loss": 0.6172661781311035,
79
  "eval_matthews_correlation": 0.0,
80
+ "eval_runtime": 1.1205,
81
+ "eval_samples_per_second": 930.859,
82
+ "eval_steps_per_second": 4.462,
83
  "step": 170
84
  },
85
  {
 
92
  "epoch": 6.0,
93
  "eval_loss": 0.6166184544563293,
94
  "eval_matthews_correlation": 0.0,
95
+ "eval_runtime": 1.2344,
96
+ "eval_samples_per_second": 844.955,
97
+ "eval_steps_per_second": 4.051,
98
  "step": 204
99
  },
100
  {
 
107
  "epoch": 7.0,
108
  "eval_loss": 0.6131083965301514,
109
  "eval_matthews_correlation": 0.0,
110
+ "eval_runtime": 1.1421,
111
+ "eval_samples_per_second": 913.231,
112
+ "eval_steps_per_second": 4.378,
113
  "step": 238
114
  },
115
  {
 
122
  "epoch": 8.0,
123
  "eval_loss": 0.6240708231925964,
124
  "eval_matthews_correlation": 0.0951039122870703,
125
+ "eval_runtime": 1.1486,
126
+ "eval_samples_per_second": 908.065,
127
+ "eval_steps_per_second": 4.353,
128
  "step": 272
129
  },
130
  {
 
137
  "epoch": 9.0,
138
  "eval_loss": 0.6361746191978455,
139
  "eval_matthews_correlation": 0.059760920069176514,
140
+ "eval_runtime": 1.2007,
141
+ "eval_samples_per_second": 868.661,
142
+ "eval_steps_per_second": 4.164,
143
  "step": 306
144
  },
145
  {
 
152
  "epoch": 10.0,
153
  "eval_loss": 0.7009902596473694,
154
  "eval_matthews_correlation": 0.08008155523655092,
155
+ "eval_runtime": 1.1475,
156
+ "eval_samples_per_second": 908.97,
157
+ "eval_steps_per_second": 4.357,
158
  "step": 340
159
  },
160
  {
 
167
  "epoch": 11.0,
168
  "eval_loss": 0.6731011867523193,
169
  "eval_matthews_correlation": 0.09051190856095573,
170
+ "eval_runtime": 1.1134,
171
+ "eval_samples_per_second": 936.763,
172
+ "eval_steps_per_second": 4.491,
173
  "step": 374
174
  },
175
  {
 
182
  "epoch": 12.0,
183
  "eval_loss": 0.6893478035926819,
184
  "eval_matthews_correlation": 0.09007205990892461,
185
+ "eval_runtime": 1.2098,
186
+ "eval_samples_per_second": 862.13,
187
+ "eval_steps_per_second": 4.133,
188
  "step": 408
189
  },
190
  {
191
+ "epoch": 12.0,
192
+ "step": 408,
193
+ "total_flos": 3258721140473856.0,
194
+ "train_loss": 0.5614397408915501,
195
+ "train_runtime": 434.3841,
196
+ "train_samples_per_second": 984.267,
197
+ "train_steps_per_second": 3.914
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
198
  }
199
  ],
200
  "max_steps": 1700,
201
  "num_train_epochs": 50,
202
+ "total_flos": 3258721140473856.0,
203
  "trial_name": null,
204
  "trial_params": null
205
  }