ShihTing commited on
Commit
37596fe
1 Parent(s): 288e25b

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +208 -106
trainer_state.json CHANGED
@@ -1,202 +1,304 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 16.0427807486631,
5
- "global_step": 3000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.07,
12
- "eval_accuracy": 0.5401069518716578,
13
- "eval_loss": 1.1939018964767456,
14
- "eval_recall": 0.5401069518716578,
15
- "eval_runtime": 10.154,
16
- "eval_samples_per_second": 18.416,
17
- "eval_steps_per_second": 4.629,
 
 
18
  "step": 200
19
  },
20
  {
21
  "epoch": 2.14,
22
- "eval_accuracy": 0.6951871657754011,
23
- "eval_loss": 0.8892518877983093,
24
- "eval_recall": 0.6951871657754011,
25
- "eval_runtime": 10.177,
26
- "eval_samples_per_second": 18.375,
27
- "eval_steps_per_second": 4.618,
 
 
28
  "step": 400
29
  },
30
  {
31
  "epoch": 2.67,
32
- "learning_rate": 4.554367201426025e-05,
33
- "loss": 1.2205,
34
  "step": 500
35
  },
36
  {
37
  "epoch": 3.21,
38
- "eval_accuracy": 0.6149732620320856,
39
- "eval_loss": 1.2934410572052002,
40
- "eval_recall": 0.6149732620320856,
41
- "eval_runtime": 10.164,
42
- "eval_samples_per_second": 18.398,
43
- "eval_steps_per_second": 4.624,
 
 
44
  "step": 600
45
  },
46
  {
47
  "epoch": 4.28,
48
- "eval_accuracy": 0.6470588235294118,
49
- "eval_loss": 1.179315447807312,
50
- "eval_recall": 0.6470588235294118,
51
- "eval_runtime": 10.156,
52
- "eval_samples_per_second": 18.413,
53
- "eval_steps_per_second": 4.628,
 
 
54
  "step": 800
55
  },
56
  {
57
  "epoch": 5.35,
58
- "learning_rate": 4.10873440285205e-05,
59
- "loss": 0.8231,
60
  "step": 1000
61
  },
62
  {
63
  "epoch": 5.35,
64
- "eval_accuracy": 0.6310160427807486,
65
- "eval_loss": 1.7959877252578735,
66
- "eval_recall": 0.6310160427807486,
67
- "eval_runtime": 10.15,
68
- "eval_samples_per_second": 18.424,
69
- "eval_steps_per_second": 4.631,
 
 
70
  "step": 1000
71
  },
72
  {
73
  "epoch": 6.42,
74
- "eval_accuracy": 0.7005347593582888,
75
- "eval_loss": 1.6884827613830566,
76
- "eval_recall": 0.7005347593582888,
77
- "eval_runtime": 10.222,
78
- "eval_samples_per_second": 18.294,
79
- "eval_steps_per_second": 4.598,
 
 
80
  "step": 1200
81
  },
82
  {
83
  "epoch": 7.49,
84
- "eval_accuracy": 0.6203208556149733,
85
- "eval_loss": 1.8338332176208496,
86
- "eval_recall": 0.6203208556149733,
87
- "eval_runtime": 10.15,
88
- "eval_samples_per_second": 18.424,
89
- "eval_steps_per_second": 4.631,
 
 
90
  "step": 1400
91
  },
92
  {
93
  "epoch": 8.02,
94
- "learning_rate": 3.6631016042780753e-05,
95
- "loss": 0.5043,
96
  "step": 1500
97
  },
98
  {
99
  "epoch": 8.56,
100
- "eval_accuracy": 0.6524064171122995,
101
- "eval_loss": 1.8866851329803467,
102
- "eval_recall": 0.6524064171122995,
103
- "eval_runtime": 10.282,
104
- "eval_samples_per_second": 18.187,
105
- "eval_steps_per_second": 4.571,
 
 
106
  "step": 1600
107
  },
108
  {
109
  "epoch": 9.63,
110
- "eval_accuracy": 0.6951871657754011,
111
- "eval_loss": 1.6332286596298218,
112
- "eval_recall": 0.6951871657754011,
113
- "eval_runtime": 10.094,
114
- "eval_samples_per_second": 18.526,
115
- "eval_steps_per_second": 4.656,
 
 
116
  "step": 1800
117
  },
118
  {
119
  "epoch": 10.7,
120
- "learning_rate": 3.2174688057041004e-05,
121
- "loss": 0.2426,
122
  "step": 2000
123
  },
124
  {
125
  "epoch": 10.7,
126
- "eval_accuracy": 0.6631016042780749,
127
- "eval_loss": 2.0470945835113525,
128
- "eval_recall": 0.6631016042780749,
129
- "eval_runtime": 10.46,
130
- "eval_samples_per_second": 17.878,
131
- "eval_steps_per_second": 4.493,
 
 
132
  "step": 2000
133
  },
134
  {
135
  "epoch": 11.76,
136
- "eval_accuracy": 0.6684491978609626,
137
- "eval_loss": 2.127241373062134,
138
- "eval_recall": 0.6684491978609626,
139
- "eval_runtime": 10.207,
140
- "eval_samples_per_second": 18.321,
141
- "eval_steps_per_second": 4.605,
 
 
142
  "step": 2200
143
  },
144
  {
145
  "epoch": 12.83,
146
- "eval_accuracy": 0.7112299465240641,
147
- "eval_loss": 2.163196563720703,
148
- "eval_recall": 0.7112299465240641,
149
- "eval_runtime": 10.211,
150
- "eval_samples_per_second": 18.314,
151
- "eval_steps_per_second": 4.603,
 
 
152
  "step": 2400
153
  },
154
  {
155
  "epoch": 13.37,
156
- "learning_rate": 2.7718360071301247e-05,
157
- "loss": 0.1709,
158
  "step": 2500
159
  },
160
  {
161
  "epoch": 13.9,
162
- "eval_accuracy": 0.679144385026738,
163
- "eval_loss": 2.2403364181518555,
164
- "eval_recall": 0.679144385026738,
165
- "eval_runtime": 10.385,
166
- "eval_samples_per_second": 18.007,
167
- "eval_steps_per_second": 4.526,
 
 
168
  "step": 2600
169
  },
170
  {
171
  "epoch": 14.97,
172
- "eval_accuracy": 0.6844919786096256,
173
- "eval_loss": 2.3301198482513428,
174
- "eval_recall": 0.6844919786096256,
175
- "eval_runtime": 10.223,
176
- "eval_samples_per_second": 18.292,
177
- "eval_steps_per_second": 4.597,
 
 
178
  "step": 2800
179
  },
180
  {
181
  "epoch": 16.04,
182
- "learning_rate": 2.32620320855615e-05,
183
- "loss": 0.1114,
184
  "step": 3000
185
  },
186
  {
187
  "epoch": 16.04,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  "eval_accuracy": 0.6898395721925134,
189
- "eval_loss": 2.3098907470703125,
 
 
190
  "eval_recall": 0.6898395721925134,
191
- "eval_runtime": 10.233,
192
- "eval_samples_per_second": 18.274,
193
- "eval_steps_per_second": 4.593,
194
- "step": 3000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
195
  }
196
  ],
197
- "max_steps": 5610,
198
- "num_train_epochs": 30,
199
- "total_flos": 3149026201829376.0,
200
  "trial_name": null,
201
  "trial_params": null
202
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 21.390374331550802,
5
+ "global_step": 4000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 1.07,
12
+ "eval_accuracy": 0.48128342245989303,
13
+ "eval_f1": 0.48128342245989303,
14
+ "eval_loss": 1.350968837738037,
15
+ "eval_precision": 0.48128342245989303,
16
+ "eval_recall": 0.48128342245989303,
17
+ "eval_runtime": 10.797,
18
+ "eval_samples_per_second": 17.32,
19
+ "eval_steps_per_second": 4.353,
20
  "step": 200
21
  },
22
  {
23
  "epoch": 2.14,
24
+ "eval_accuracy": 0.6577540106951871,
25
+ "eval_f1": 0.6577540106951871,
26
+ "eval_loss": 1.0548419952392578,
27
+ "eval_precision": 0.6577540106951871,
28
+ "eval_recall": 0.6577540106951871,
29
+ "eval_runtime": 10.384,
30
+ "eval_samples_per_second": 18.008,
31
+ "eval_steps_per_second": 4.526,
32
  "step": 400
33
  },
34
  {
35
  "epoch": 2.67,
36
+ "learning_rate": 4.732620320855615e-05,
37
+ "loss": 1.271,
38
  "step": 500
39
  },
40
  {
41
  "epoch": 3.21,
42
+ "eval_accuracy": 0.6631016042780749,
43
+ "eval_f1": 0.6631016042780749,
44
+ "eval_loss": 1.257256031036377,
45
+ "eval_precision": 0.6631016042780749,
46
+ "eval_recall": 0.6631016042780749,
47
+ "eval_runtime": 10.09,
48
+ "eval_samples_per_second": 18.533,
49
+ "eval_steps_per_second": 4.658,
50
  "step": 600
51
  },
52
  {
53
  "epoch": 4.28,
54
+ "eval_accuracy": 0.6577540106951871,
55
+ "eval_f1": 0.6577540106951871,
56
+ "eval_loss": 1.3473643064498901,
57
+ "eval_precision": 0.6577540106951871,
58
+ "eval_recall": 0.6577540106951871,
59
+ "eval_runtime": 10.111,
60
+ "eval_samples_per_second": 18.495,
61
+ "eval_steps_per_second": 4.648,
62
  "step": 800
63
  },
64
  {
65
  "epoch": 5.35,
66
+ "learning_rate": 4.4652406417112304e-05,
67
+ "loss": 0.791,
68
  "step": 1000
69
  },
70
  {
71
  "epoch": 5.35,
72
+ "eval_accuracy": 0.6844919786096256,
73
+ "eval_f1": 0.6844919786096256,
74
+ "eval_loss": 1.6341558694839478,
75
+ "eval_precision": 0.6844919786096256,
76
+ "eval_recall": 0.6844919786096256,
77
+ "eval_runtime": 10.125,
78
+ "eval_samples_per_second": 18.469,
79
+ "eval_steps_per_second": 4.642,
80
  "step": 1000
81
  },
82
  {
83
  "epoch": 6.42,
84
+ "eval_accuracy": 0.7272727272727273,
85
+ "eval_f1": 0.7272727272727273,
86
+ "eval_loss": 1.7296054363250732,
87
+ "eval_precision": 0.7272727272727273,
88
+ "eval_recall": 0.7272727272727273,
89
+ "eval_runtime": 10.343,
90
+ "eval_samples_per_second": 18.08,
91
+ "eval_steps_per_second": 4.544,
92
  "step": 1200
93
  },
94
  {
95
  "epoch": 7.49,
96
+ "eval_accuracy": 0.7112299465240641,
97
+ "eval_f1": 0.7112299465240641,
98
+ "eval_loss": 1.687750220298767,
99
+ "eval_precision": 0.7112299465240641,
100
+ "eval_recall": 0.7112299465240641,
101
+ "eval_runtime": 10.345,
102
+ "eval_samples_per_second": 18.076,
103
+ "eval_steps_per_second": 4.543,
104
  "step": 1400
105
  },
106
  {
107
  "epoch": 8.02,
108
+ "learning_rate": 4.197860962566845e-05,
109
+ "loss": 0.5343,
110
  "step": 1500
111
  },
112
  {
113
  "epoch": 8.56,
114
+ "eval_accuracy": 0.6951871657754011,
115
+ "eval_f1": 0.6951871657754011,
116
+ "eval_loss": 1.8570679426193237,
117
+ "eval_precision": 0.6951871657754011,
118
+ "eval_recall": 0.6951871657754011,
119
+ "eval_runtime": 10.19,
120
+ "eval_samples_per_second": 18.351,
121
+ "eval_steps_per_second": 4.612,
122
  "step": 1600
123
  },
124
  {
125
  "epoch": 9.63,
126
+ "eval_accuracy": 0.7058823529411765,
127
+ "eval_f1": 0.7058823529411765,
128
+ "eval_loss": 1.9290939569473267,
129
+ "eval_precision": 0.7058823529411765,
130
+ "eval_recall": 0.7058823529411765,
131
+ "eval_runtime": 10.161,
132
+ "eval_samples_per_second": 18.404,
133
+ "eval_steps_per_second": 4.626,
134
  "step": 1800
135
  },
136
  {
137
  "epoch": 10.7,
138
+ "learning_rate": 3.93048128342246e-05,
139
+ "loss": 0.2934,
140
  "step": 2000
141
  },
142
  {
143
  "epoch": 10.7,
144
+ "eval_accuracy": 0.7005347593582888,
145
+ "eval_f1": 0.7005347593582888,
146
+ "eval_loss": 1.9618135690689087,
147
+ "eval_precision": 0.7005347593582888,
148
+ "eval_recall": 0.7005347593582888,
149
+ "eval_runtime": 10.233,
150
+ "eval_samples_per_second": 18.274,
151
+ "eval_steps_per_second": 4.593,
152
  "step": 2000
153
  },
154
  {
155
  "epoch": 11.76,
156
+ "eval_accuracy": 0.7272727272727273,
157
+ "eval_f1": 0.7272727272727273,
158
+ "eval_loss": 1.8590352535247803,
159
+ "eval_precision": 0.7272727272727273,
160
+ "eval_recall": 0.7272727272727273,
161
+ "eval_runtime": 10.155,
162
+ "eval_samples_per_second": 18.415,
163
+ "eval_steps_per_second": 4.628,
164
  "step": 2200
165
  },
166
  {
167
  "epoch": 12.83,
168
+ "eval_accuracy": 0.732620320855615,
169
+ "eval_f1": 0.7326203208556151,
170
+ "eval_loss": 2.042591094970703,
171
+ "eval_precision": 0.732620320855615,
172
+ "eval_recall": 0.732620320855615,
173
+ "eval_runtime": 10.107,
174
+ "eval_samples_per_second": 18.502,
175
+ "eval_steps_per_second": 4.65,
176
  "step": 2400
177
  },
178
  {
179
  "epoch": 13.37,
180
+ "learning_rate": 3.6631016042780753e-05,
181
+ "loss": 0.1892,
182
  "step": 2500
183
  },
184
  {
185
  "epoch": 13.9,
186
+ "eval_accuracy": 0.7058823529411765,
187
+ "eval_f1": 0.7058823529411765,
188
+ "eval_loss": 2.264678716659546,
189
+ "eval_precision": 0.7058823529411765,
190
+ "eval_recall": 0.7058823529411765,
191
+ "eval_runtime": 10.161,
192
+ "eval_samples_per_second": 18.404,
193
+ "eval_steps_per_second": 4.626,
194
  "step": 2600
195
  },
196
  {
197
  "epoch": 14.97,
198
+ "eval_accuracy": 0.7112299465240641,
199
+ "eval_f1": 0.7112299465240641,
200
+ "eval_loss": 2.111647844314575,
201
+ "eval_precision": 0.7112299465240641,
202
+ "eval_recall": 0.7112299465240641,
203
+ "eval_runtime": 10.145,
204
+ "eval_samples_per_second": 18.433,
205
+ "eval_steps_per_second": 4.633,
206
  "step": 2800
207
  },
208
  {
209
  "epoch": 16.04,
210
+ "learning_rate": 3.39572192513369e-05,
211
+ "loss": 0.129,
212
  "step": 3000
213
  },
214
  {
215
  "epoch": 16.04,
216
+ "eval_accuracy": 0.7165775401069518,
217
+ "eval_f1": 0.7165775401069518,
218
+ "eval_loss": 2.1683263778686523,
219
+ "eval_precision": 0.7165775401069518,
220
+ "eval_recall": 0.7165775401069518,
221
+ "eval_runtime": 10.287,
222
+ "eval_samples_per_second": 18.178,
223
+ "eval_steps_per_second": 4.569,
224
+ "step": 3000
225
+ },
226
+ {
227
+ "epoch": 17.11,
228
+ "eval_accuracy": 0.7165775401069518,
229
+ "eval_f1": 0.7165775401069518,
230
+ "eval_loss": 2.329145669937134,
231
+ "eval_precision": 0.7165775401069518,
232
+ "eval_recall": 0.7165775401069518,
233
+ "eval_runtime": 10.188,
234
+ "eval_samples_per_second": 18.355,
235
+ "eval_steps_per_second": 4.613,
236
+ "step": 3200
237
+ },
238
+ {
239
+ "epoch": 18.18,
240
  "eval_accuracy": 0.6898395721925134,
241
+ "eval_f1": 0.6898395721925134,
242
+ "eval_loss": 2.538438081741333,
243
+ "eval_precision": 0.6898395721925134,
244
  "eval_recall": 0.6898395721925134,
245
+ "eval_runtime": 10.202,
246
+ "eval_samples_per_second": 18.33,
247
+ "eval_steps_per_second": 4.607,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 18.72,
252
+ "learning_rate": 3.128342245989305e-05,
253
+ "loss": 0.1084,
254
+ "step": 3500
255
+ },
256
+ {
257
+ "epoch": 19.25,
258
+ "eval_accuracy": 0.6951871657754011,
259
+ "eval_f1": 0.6951871657754011,
260
+ "eval_loss": 2.630084753036499,
261
+ "eval_precision": 0.6951871657754011,
262
+ "eval_recall": 0.6951871657754011,
263
+ "eval_runtime": 10.362,
264
+ "eval_samples_per_second": 18.047,
265
+ "eval_steps_per_second": 4.536,
266
+ "step": 3600
267
+ },
268
+ {
269
+ "epoch": 20.32,
270
+ "eval_accuracy": 0.6684491978609626,
271
+ "eval_f1": 0.6684491978609626,
272
+ "eval_loss": 2.8742926120758057,
273
+ "eval_precision": 0.6684491978609626,
274
+ "eval_recall": 0.6684491978609626,
275
+ "eval_runtime": 10.323,
276
+ "eval_samples_per_second": 18.115,
277
+ "eval_steps_per_second": 4.553,
278
+ "step": 3800
279
+ },
280
+ {
281
+ "epoch": 21.39,
282
+ "learning_rate": 2.8609625668449196e-05,
283
+ "loss": 0.0935,
284
+ "step": 4000
285
+ },
286
+ {
287
+ "epoch": 21.39,
288
+ "eval_accuracy": 0.7219251336898396,
289
+ "eval_f1": 0.7219251336898396,
290
+ "eval_loss": 2.320441484451294,
291
+ "eval_precision": 0.7219251336898396,
292
+ "eval_recall": 0.7219251336898396,
293
+ "eval_runtime": 10.306,
294
+ "eval_samples_per_second": 18.145,
295
+ "eval_steps_per_second": 4.56,
296
+ "step": 4000
297
  }
298
  ],
299
+ "max_steps": 9350,
300
+ "num_train_epochs": 50,
301
+ "total_flos": 4198877016109056.0,
302
  "trial_name": null,
303
  "trial_params": null
304
  }