diogopaes10 commited on
Commit
dad6435
β€’
1 Parent(s): 7785de4

End of training

Browse files
checkpoint-25/trainer_state.json DELETED
@@ -1,48 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
- "global_step": 25,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.04,
12
- "learning_rate": 1.9946666666666667e-05,
13
- "loss": 2.3122,
14
- "step": 1
15
- },
16
- {
17
- "epoch": 0.76,
18
- "learning_rate": 1.898666666666667e-05,
19
- "loss": 2.3022,
20
- "step": 19
21
- },
22
- {
23
- "epoch": 0.76,
24
- "eval_accuracy": 0.1,
25
- "eval_disk_space_total": 78.1898422241211,
26
- "eval_disk_space_used": 33.75697326660156,
27
- "eval_f1": 0.018181818181818184,
28
- "eval_gpu_ram_allocated": 4.172749996185303,
29
- "eval_gpu_ram_cached": 26.759765625,
30
- "eval_gpu_ram_total": 39.56402587890625,
31
- "eval_gpu_utilization": 45,
32
- "eval_loss": 2.3011605739593506,
33
- "eval_precision": 0.01,
34
- "eval_recall": 0.1,
35
- "eval_runtime": 0.3917,
36
- "eval_samples_per_second": 510.562,
37
- "eval_steps_per_second": 17.87,
38
- "eval_system_ram_total": 83.48074722290039,
39
- "eval_system_ram_used": 4.445606231689453,
40
- "step": 19
41
- }
42
- ],
43
- "max_steps": 375,
44
- "num_train_epochs": 15,
45
- "total_flos": 54485323206528.0,
46
- "trial_name": null,
47
- "trial_params": null
48
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{checkpoint-25 β†’ checkpoint-350}/added_tokens.json RENAMED
File without changes
{checkpoint-25 β†’ checkpoint-350}/config.json RENAMED
File without changes
{checkpoint-25 β†’ checkpoint-350}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e921cbba08c6fecf2f697336c3be2bfe10852cddfbe550a52d6aeff7c8e04c99
3
- size 1475556869
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ab43cd5a44b21b5a22eef77417e02f84ff4e98fc1b7eabeba4e3e8e7e45f0ff
3
+ size 1475557125
{checkpoint-25 β†’ checkpoint-350}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a37c7031758a4eed7cf4c6318896b5e93e57169ad17514c2908814b2511d8869
3
  size 737788917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:328c4be909b1eef5cc41b750f680802efe8089d37bd9dcf8733d88094f13ce64
3
  size 737788917
{checkpoint-25 β†’ checkpoint-350}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3acef221ac140539d08c3e34b90e2817f8ef37ce8780d4ba3a802620d305bc64
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9aced460dfb01f09e32fb5f5dbf1fad9b487697a848ea1f448da69957c04042
3
  size 14575
{checkpoint-25 β†’ checkpoint-350}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:345285befea1fa1e6cf13ce42d848a29cbf62a253c2e89639e04c982764c7503
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbb89204c9563d41cdc3199eb481d6f6f625cb563a54343cca262d582298e755
3
  size 627
{checkpoint-25 β†’ checkpoint-350}/special_tokens_map.json RENAMED
File without changes
{checkpoint-25 β†’ checkpoint-350}/spm.model RENAMED
File without changes
{checkpoint-25 β†’ checkpoint-350}/tokenizer.json RENAMED
File without changes
{checkpoint-25 β†’ checkpoint-350}/tokenizer_config.json RENAMED
File without changes
checkpoint-350/trainer_state.json ADDED
@@ -0,0 +1,490 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 14.0,
5
+ "global_step": 350,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04,
12
+ "learning_rate": 1.9946666666666667e-05,
13
+ "loss": 2.3122,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.76,
18
+ "learning_rate": 1.898666666666667e-05,
19
+ "loss": 2.3022,
20
+ "step": 19
21
+ },
22
+ {
23
+ "epoch": 0.76,
24
+ "eval_accuracy": 0.1,
25
+ "eval_disk_space_total": 78.1898422241211,
26
+ "eval_disk_space_used": 33.75697326660156,
27
+ "eval_f1": 0.018181818181818184,
28
+ "eval_gpu_ram_allocated": 4.172749996185303,
29
+ "eval_gpu_ram_cached": 26.759765625,
30
+ "eval_gpu_ram_total": 39.56402587890625,
31
+ "eval_gpu_utilization": 45,
32
+ "eval_loss": 2.3011605739593506,
33
+ "eval_precision": 0.01,
34
+ "eval_recall": 0.1,
35
+ "eval_runtime": 0.3917,
36
+ "eval_samples_per_second": 510.562,
37
+ "eval_steps_per_second": 17.87,
38
+ "eval_system_ram_total": 83.48074722290039,
39
+ "eval_system_ram_used": 4.445606231689453,
40
+ "step": 19
41
+ },
42
+ {
43
+ "epoch": 1.52,
44
+ "learning_rate": 1.7973333333333333e-05,
45
+ "loss": 2.2979,
46
+ "step": 38
47
+ },
48
+ {
49
+ "epoch": 1.52,
50
+ "eval_accuracy": 0.155,
51
+ "eval_disk_space_total": 78.1898422241211,
52
+ "eval_disk_space_used": 38.592166900634766,
53
+ "eval_f1": 0.06350931677018633,
54
+ "eval_gpu_ram_allocated": 4.172748565673828,
55
+ "eval_gpu_ram_cached": 26.771484375,
56
+ "eval_gpu_ram_total": 39.56402587890625,
57
+ "eval_gpu_utilization": 43,
58
+ "eval_loss": 2.2853753566741943,
59
+ "eval_precision": 0.04491869918699187,
60
+ "eval_recall": 0.155,
61
+ "eval_runtime": 0.3679,
62
+ "eval_samples_per_second": 543.585,
63
+ "eval_steps_per_second": 19.025,
64
+ "eval_system_ram_total": 83.48074722290039,
65
+ "eval_system_ram_used": 5.034675598144531,
66
+ "step": 38
67
+ },
68
+ {
69
+ "epoch": 2.28,
70
+ "learning_rate": 1.696e-05,
71
+ "loss": 2.2316,
72
+ "step": 57
73
+ },
74
+ {
75
+ "epoch": 2.28,
76
+ "eval_accuracy": 0.305,
77
+ "eval_disk_space_total": 78.1898422241211,
78
+ "eval_disk_space_used": 40.66392517089844,
79
+ "eval_f1": 0.2284533029124758,
80
+ "eval_gpu_ram_allocated": 4.172748565673828,
81
+ "eval_gpu_ram_cached": 26.771484375,
82
+ "eval_gpu_ram_total": 39.56402587890625,
83
+ "eval_gpu_utilization": 44,
84
+ "eval_loss": 2.1098108291625977,
85
+ "eval_precision": 0.2806426799007444,
86
+ "eval_recall": 0.305,
87
+ "eval_runtime": 0.3885,
88
+ "eval_samples_per_second": 514.84,
89
+ "eval_steps_per_second": 18.019,
90
+ "eval_system_ram_total": 83.48074722290039,
91
+ "eval_system_ram_used": 5.178070068359375,
92
+ "step": 57
93
+ },
94
+ {
95
+ "epoch": 3.04,
96
+ "learning_rate": 1.5946666666666668e-05,
97
+ "loss": 1.9915,
98
+ "step": 76
99
+ },
100
+ {
101
+ "epoch": 3.04,
102
+ "eval_accuracy": 0.43,
103
+ "eval_disk_space_total": 78.1898422241211,
104
+ "eval_disk_space_used": 40.663944244384766,
105
+ "eval_f1": 0.4147692106558386,
106
+ "eval_gpu_ram_allocated": 4.172744274139404,
107
+ "eval_gpu_ram_cached": 26.771484375,
108
+ "eval_gpu_ram_total": 39.56402587890625,
109
+ "eval_gpu_utilization": 50,
110
+ "eval_loss": 1.8477184772491455,
111
+ "eval_precision": 0.5039625709645932,
112
+ "eval_recall": 0.43,
113
+ "eval_runtime": 0.3674,
114
+ "eval_samples_per_second": 544.402,
115
+ "eval_steps_per_second": 19.054,
116
+ "eval_system_ram_total": 83.48074722290039,
117
+ "eval_system_ram_used": 5.174091339111328,
118
+ "step": 76
119
+ },
120
+ {
121
+ "epoch": 3.8,
122
+ "learning_rate": 1.4933333333333335e-05,
123
+ "loss": 1.684,
124
+ "step": 95
125
+ },
126
+ {
127
+ "epoch": 3.8,
128
+ "eval_accuracy": 0.55,
129
+ "eval_disk_space_total": 78.1898422241211,
130
+ "eval_disk_space_used": 40.663944244384766,
131
+ "eval_f1": 0.5271704828853498,
132
+ "eval_gpu_ram_allocated": 4.172764301300049,
133
+ "eval_gpu_ram_cached": 26.771484375,
134
+ "eval_gpu_ram_total": 39.56402587890625,
135
+ "eval_gpu_utilization": 47,
136
+ "eval_loss": 1.6027369499206543,
137
+ "eval_precision": 0.5665937491057825,
138
+ "eval_recall": 0.55,
139
+ "eval_runtime": 0.3642,
140
+ "eval_samples_per_second": 549.083,
141
+ "eval_steps_per_second": 19.218,
142
+ "eval_system_ram_total": 83.48074722290039,
143
+ "eval_system_ram_used": 5.1766204833984375,
144
+ "step": 95
145
+ },
146
+ {
147
+ "epoch": 4.56,
148
+ "learning_rate": 1.392e-05,
149
+ "loss": 1.3911,
150
+ "step": 114
151
+ },
152
+ {
153
+ "epoch": 4.56,
154
+ "eval_accuracy": 0.615,
155
+ "eval_disk_space_total": 78.1898422241211,
156
+ "eval_disk_space_used": 40.66395950317383,
157
+ "eval_f1": 0.6060242411805659,
158
+ "eval_gpu_ram_allocated": 4.172757148742676,
159
+ "eval_gpu_ram_cached": 26.771484375,
160
+ "eval_gpu_ram_total": 39.56402587890625,
161
+ "eval_gpu_utilization": 49,
162
+ "eval_loss": 1.4364641904830933,
163
+ "eval_precision": 0.619891761052247,
164
+ "eval_recall": 0.615,
165
+ "eval_runtime": 0.3684,
166
+ "eval_samples_per_second": 542.848,
167
+ "eval_steps_per_second": 19.0,
168
+ "eval_system_ram_total": 83.48074722290039,
169
+ "eval_system_ram_used": 5.174568176269531,
170
+ "step": 114
171
+ },
172
+ {
173
+ "epoch": 5.32,
174
+ "learning_rate": 1.2906666666666668e-05,
175
+ "loss": 1.1477,
176
+ "step": 133
177
+ },
178
+ {
179
+ "epoch": 5.32,
180
+ "eval_accuracy": 0.615,
181
+ "eval_disk_space_total": 78.1898422241211,
182
+ "eval_disk_space_used": 40.663963317871094,
183
+ "eval_f1": 0.6215457086252978,
184
+ "eval_gpu_ram_allocated": 4.172830104827881,
185
+ "eval_gpu_ram_cached": 26.771484375,
186
+ "eval_gpu_ram_total": 39.56402587890625,
187
+ "eval_gpu_utilization": 52,
188
+ "eval_loss": 1.2565349340438843,
189
+ "eval_precision": 0.6418622536733991,
190
+ "eval_recall": 0.615,
191
+ "eval_runtime": 0.3792,
192
+ "eval_samples_per_second": 527.473,
193
+ "eval_steps_per_second": 18.462,
194
+ "eval_system_ram_total": 83.48074722290039,
195
+ "eval_system_ram_used": 5.1585845947265625,
196
+ "step": 133
197
+ },
198
+ {
199
+ "epoch": 6.08,
200
+ "learning_rate": 1.1893333333333335e-05,
201
+ "loss": 0.9198,
202
+ "step": 152
203
+ },
204
+ {
205
+ "epoch": 6.08,
206
+ "eval_accuracy": 0.64,
207
+ "eval_disk_space_total": 78.1898422241211,
208
+ "eval_disk_space_used": 40.66396713256836,
209
+ "eval_f1": 0.6399837944918036,
210
+ "eval_gpu_ram_allocated": 4.172749996185303,
211
+ "eval_gpu_ram_cached": 26.771484375,
212
+ "eval_gpu_ram_total": 39.56402587890625,
213
+ "eval_gpu_utilization": 55,
214
+ "eval_loss": 1.175949215888977,
215
+ "eval_precision": 0.6532184905737537,
216
+ "eval_recall": 0.64,
217
+ "eval_runtime": 0.3814,
218
+ "eval_samples_per_second": 524.426,
219
+ "eval_steps_per_second": 18.355,
220
+ "eval_system_ram_total": 83.48074722290039,
221
+ "eval_system_ram_used": 5.180980682373047,
222
+ "step": 152
223
+ },
224
+ {
225
+ "epoch": 6.84,
226
+ "learning_rate": 1.0880000000000001e-05,
227
+ "loss": 0.7605,
228
+ "step": 171
229
+ },
230
+ {
231
+ "epoch": 6.84,
232
+ "eval_accuracy": 0.645,
233
+ "eval_disk_space_total": 78.1898422241211,
234
+ "eval_disk_space_used": 40.66401672363281,
235
+ "eval_f1": 0.6418225015596317,
236
+ "eval_gpu_ram_allocated": 4.1727471351623535,
237
+ "eval_gpu_ram_cached": 26.771484375,
238
+ "eval_gpu_ram_total": 39.56402587890625,
239
+ "eval_gpu_utilization": 45,
240
+ "eval_loss": 1.112830638885498,
241
+ "eval_precision": 0.6564146498179483,
242
+ "eval_recall": 0.645,
243
+ "eval_runtime": 0.3808,
244
+ "eval_samples_per_second": 525.141,
245
+ "eval_steps_per_second": 18.38,
246
+ "eval_system_ram_total": 83.48074722290039,
247
+ "eval_system_ram_used": 5.141529083251953,
248
+ "step": 171
249
+ },
250
+ {
251
+ "epoch": 7.6,
252
+ "learning_rate": 9.866666666666668e-06,
253
+ "loss": 0.6093,
254
+ "step": 190
255
+ },
256
+ {
257
+ "epoch": 7.6,
258
+ "eval_accuracy": 0.67,
259
+ "eval_disk_space_total": 78.1898422241211,
260
+ "eval_disk_space_used": 40.66403579711914,
261
+ "eval_f1": 0.6677676797410276,
262
+ "eval_gpu_ram_allocated": 4.172770023345947,
263
+ "eval_gpu_ram_cached": 26.771484375,
264
+ "eval_gpu_ram_total": 39.56402587890625,
265
+ "eval_gpu_utilization": 43,
266
+ "eval_loss": 1.0766719579696655,
267
+ "eval_precision": 0.6757531007004691,
268
+ "eval_recall": 0.67,
269
+ "eval_runtime": 0.383,
270
+ "eval_samples_per_second": 522.188,
271
+ "eval_steps_per_second": 18.277,
272
+ "eval_system_ram_total": 83.48074722290039,
273
+ "eval_system_ram_used": 5.134746551513672,
274
+ "step": 190
275
+ },
276
+ {
277
+ "epoch": 8.36,
278
+ "learning_rate": 8.853333333333334e-06,
279
+ "loss": 0.5111,
280
+ "step": 209
281
+ },
282
+ {
283
+ "epoch": 8.36,
284
+ "eval_accuracy": 0.655,
285
+ "eval_disk_space_total": 78.1898422241211,
286
+ "eval_disk_space_used": 40.6640510559082,
287
+ "eval_f1": 0.655230999157746,
288
+ "eval_gpu_ram_allocated": 4.1727728843688965,
289
+ "eval_gpu_ram_cached": 26.771484375,
290
+ "eval_gpu_ram_total": 39.56402587890625,
291
+ "eval_gpu_utilization": 52,
292
+ "eval_loss": 1.1033188104629517,
293
+ "eval_precision": 0.6741666516629499,
294
+ "eval_recall": 0.655,
295
+ "eval_runtime": 0.369,
296
+ "eval_samples_per_second": 541.937,
297
+ "eval_steps_per_second": 18.968,
298
+ "eval_system_ram_total": 83.48074722290039,
299
+ "eval_system_ram_used": 5.120639801025391,
300
+ "step": 209
301
+ },
302
+ {
303
+ "epoch": 9.12,
304
+ "learning_rate": 7.840000000000001e-06,
305
+ "loss": 0.3828,
306
+ "step": 228
307
+ },
308
+ {
309
+ "epoch": 9.12,
310
+ "eval_accuracy": 0.69,
311
+ "eval_disk_space_total": 78.1898422241211,
312
+ "eval_disk_space_used": 40.664058685302734,
313
+ "eval_f1": 0.6874757485635422,
314
+ "eval_gpu_ram_allocated": 4.17274284362793,
315
+ "eval_gpu_ram_cached": 26.771484375,
316
+ "eval_gpu_ram_total": 39.56402587890625,
317
+ "eval_gpu_utilization": 44,
318
+ "eval_loss": 1.1062885522842407,
319
+ "eval_precision": 0.692681623931624,
320
+ "eval_recall": 0.69,
321
+ "eval_runtime": 0.373,
322
+ "eval_samples_per_second": 536.14,
323
+ "eval_steps_per_second": 18.765,
324
+ "eval_system_ram_total": 83.48074722290039,
325
+ "eval_system_ram_used": 5.148380279541016,
326
+ "step": 228
327
+ },
328
+ {
329
+ "epoch": 9.88,
330
+ "learning_rate": 6.826666666666667e-06,
331
+ "loss": 0.3082,
332
+ "step": 247
333
+ },
334
+ {
335
+ "epoch": 9.88,
336
+ "eval_accuracy": 0.665,
337
+ "eval_disk_space_total": 78.1898422241211,
338
+ "eval_disk_space_used": 40.664058685302734,
339
+ "eval_f1": 0.657310556170902,
340
+ "eval_gpu_ram_allocated": 4.172765731811523,
341
+ "eval_gpu_ram_cached": 26.771484375,
342
+ "eval_gpu_ram_total": 39.56402587890625,
343
+ "eval_gpu_utilization": 45,
344
+ "eval_loss": 1.1239553689956665,
345
+ "eval_precision": 0.6595053580899589,
346
+ "eval_recall": 0.665,
347
+ "eval_runtime": 0.3747,
348
+ "eval_samples_per_second": 533.805,
349
+ "eval_steps_per_second": 18.683,
350
+ "eval_system_ram_total": 83.48074722290039,
351
+ "eval_system_ram_used": 5.143707275390625,
352
+ "step": 247
353
+ },
354
+ {
355
+ "epoch": 10.64,
356
+ "learning_rate": 5.813333333333334e-06,
357
+ "loss": 0.2716,
358
+ "step": 266
359
+ },
360
+ {
361
+ "epoch": 10.64,
362
+ "eval_accuracy": 0.665,
363
+ "eval_disk_space_total": 78.1898422241211,
364
+ "eval_disk_space_used": 40.66410827636719,
365
+ "eval_f1": 0.6603758535972988,
366
+ "eval_gpu_ram_allocated": 4.172775745391846,
367
+ "eval_gpu_ram_cached": 26.771484375,
368
+ "eval_gpu_ram_total": 39.56402587890625,
369
+ "eval_gpu_utilization": 45,
370
+ "eval_loss": 1.157199501991272,
371
+ "eval_precision": 0.666485053212742,
372
+ "eval_recall": 0.665,
373
+ "eval_runtime": 0.3689,
374
+ "eval_samples_per_second": 542.088,
375
+ "eval_steps_per_second": 18.973,
376
+ "eval_system_ram_total": 83.48074722290039,
377
+ "eval_system_ram_used": 5.068927764892578,
378
+ "step": 266
379
+ },
380
+ {
381
+ "epoch": 11.4,
382
+ "learning_rate": 4.800000000000001e-06,
383
+ "loss": 0.2442,
384
+ "step": 285
385
+ },
386
+ {
387
+ "epoch": 11.4,
388
+ "eval_accuracy": 0.675,
389
+ "eval_disk_space_total": 78.1898422241211,
390
+ "eval_disk_space_used": 40.66411209106445,
391
+ "eval_f1": 0.6764825046795931,
392
+ "eval_gpu_ram_allocated": 4.17277717590332,
393
+ "eval_gpu_ram_cached": 26.771484375,
394
+ "eval_gpu_ram_total": 39.56402587890625,
395
+ "eval_gpu_utilization": 42,
396
+ "eval_loss": 1.1057575941085815,
397
+ "eval_precision": 0.6826701222753855,
398
+ "eval_recall": 0.675,
399
+ "eval_runtime": 0.3756,
400
+ "eval_samples_per_second": 532.455,
401
+ "eval_steps_per_second": 18.636,
402
+ "eval_system_ram_total": 83.48074722290039,
403
+ "eval_system_ram_used": 5.031635284423828,
404
+ "step": 285
405
+ },
406
+ {
407
+ "epoch": 12.16,
408
+ "learning_rate": 3.7866666666666667e-06,
409
+ "loss": 0.1791,
410
+ "step": 304
411
+ },
412
+ {
413
+ "epoch": 12.16,
414
+ "eval_accuracy": 0.645,
415
+ "eval_disk_space_total": 78.1898422241211,
416
+ "eval_disk_space_used": 40.664119720458984,
417
+ "eval_f1": 0.6445427433817439,
418
+ "eval_gpu_ram_allocated": 4.172751426696777,
419
+ "eval_gpu_ram_cached": 26.771484375,
420
+ "eval_gpu_ram_total": 39.56402587890625,
421
+ "eval_gpu_utilization": 46,
422
+ "eval_loss": 1.1455398797988892,
423
+ "eval_precision": 0.6514674297225784,
424
+ "eval_recall": 0.645,
425
+ "eval_runtime": 0.37,
426
+ "eval_samples_per_second": 540.499,
427
+ "eval_steps_per_second": 18.917,
428
+ "eval_system_ram_total": 83.48074722290039,
429
+ "eval_system_ram_used": 5.071483612060547,
430
+ "step": 304
431
+ },
432
+ {
433
+ "epoch": 12.92,
434
+ "learning_rate": 2.7733333333333336e-06,
435
+ "loss": 0.1604,
436
+ "step": 323
437
+ },
438
+ {
439
+ "epoch": 12.92,
440
+ "eval_accuracy": 0.66,
441
+ "eval_disk_space_total": 78.1898422241211,
442
+ "eval_disk_space_used": 40.66413116455078,
443
+ "eval_f1": 0.6578390320866488,
444
+ "eval_gpu_ram_allocated": 4.172774314880371,
445
+ "eval_gpu_ram_cached": 26.771484375,
446
+ "eval_gpu_ram_total": 39.56402587890625,
447
+ "eval_gpu_utilization": 57,
448
+ "eval_loss": 1.1514325141906738,
449
+ "eval_precision": 0.6686316887895163,
450
+ "eval_recall": 0.66,
451
+ "eval_runtime": 0.368,
452
+ "eval_samples_per_second": 543.448,
453
+ "eval_steps_per_second": 19.021,
454
+ "eval_system_ram_total": 83.48074722290039,
455
+ "eval_system_ram_used": 5.0727691650390625,
456
+ "step": 323
457
+ },
458
+ {
459
+ "epoch": 13.68,
460
+ "learning_rate": 1.76e-06,
461
+ "loss": 0.1389,
462
+ "step": 342
463
+ },
464
+ {
465
+ "epoch": 13.68,
466
+ "eval_accuracy": 0.675,
467
+ "eval_disk_space_total": 78.1898422241211,
468
+ "eval_disk_space_used": 40.66417694091797,
469
+ "eval_f1": 0.6714518778998741,
470
+ "eval_gpu_ram_allocated": 4.172738552093506,
471
+ "eval_gpu_ram_cached": 26.771484375,
472
+ "eval_gpu_ram_total": 39.56402587890625,
473
+ "eval_gpu_utilization": 48,
474
+ "eval_loss": 1.160007357597351,
475
+ "eval_precision": 0.6807936997642879,
476
+ "eval_recall": 0.675,
477
+ "eval_runtime": 0.3735,
478
+ "eval_samples_per_second": 535.495,
479
+ "eval_steps_per_second": 18.742,
480
+ "eval_system_ram_total": 83.48074722290039,
481
+ "eval_system_ram_used": 5.065456390380859,
482
+ "step": 342
483
+ }
484
+ ],
485
+ "max_steps": 375,
486
+ "num_train_epochs": 15,
487
+ "total_flos": 771116158921728.0,
488
+ "trial_name": null,
489
+ "trial_params": null
490
+ }
{checkpoint-25 β†’ checkpoint-350}/training_args.bin RENAMED
File without changes
checkpoint-375/added_tokens.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "[MASK]": 128000
3
+ }
checkpoint-375/config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "microsoft/deberta-v3-base",
3
+ "architectures": [
4
+ "DebertaV2ForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "hidden_act": "gelu",
8
+ "hidden_dropout_prob": 0.1,
9
+ "hidden_size": 768,
10
+ "id2label": {
11
+ "0": "Society & Culture",
12
+ "1": "Science & Mathematics",
13
+ "2": "Health",
14
+ "3": "Education & Reference",
15
+ "4": "Computers & Internet",
16
+ "5": "Sports",
17
+ "6": "Business & Finance",
18
+ "7": "Entertainment & Music",
19
+ "8": "Family & Relationships",
20
+ "9": "Politics & Government"
21
+ },
22
+ "initializer_range": 0.02,
23
+ "intermediate_size": 3072,
24
+ "label2id": {
25
+ "Business & Finance": 6,
26
+ "Computers & Internet": 4,
27
+ "Education & Reference": 3,
28
+ "Entertainment & Music": 7,
29
+ "Family & Relationships": 8,
30
+ "Health": 2,
31
+ "Politics & Government": 9,
32
+ "Science & Mathematics": 1,
33
+ "Society & Culture": 0,
34
+ "Sports": 5
35
+ },
36
+ "layer_norm_eps": 1e-07,
37
+ "max_position_embeddings": 512,
38
+ "max_relative_positions": -1,
39
+ "model_type": "deberta-v2",
40
+ "norm_rel_ebd": "layer_norm",
41
+ "num_attention_heads": 12,
42
+ "num_hidden_layers": 12,
43
+ "pad_token_id": 0,
44
+ "pooler_dropout": 0,
45
+ "pooler_hidden_act": "gelu",
46
+ "pooler_hidden_size": 768,
47
+ "pos_att_type": [
48
+ "p2c",
49
+ "c2p"
50
+ ],
51
+ "position_biased_input": false,
52
+ "position_buckets": 256,
53
+ "relative_attention": true,
54
+ "share_att_key": true,
55
+ "torch_dtype": "float32",
56
+ "transformers_version": "4.31.0",
57
+ "type_vocab_size": 0,
58
+ "vocab_size": 128100
59
+ }
checkpoint-375/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5594f7693a00c08caf3e70e1ecaa87394cd62bfcfdd82b4b0c82ce4b244d92f1
3
+ size 1475557125
checkpoint-375/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc4b810698b3fcb8652ef309273ad4a6eee6a8b551f0cf6d1cbcc23c8eb1803
3
+ size 737788917
checkpoint-375/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdeeb11d8cde2c5546ab64b9bc25bfce6f182490f39f172e19cfcfb132e4bd2a
3
+ size 14575
checkpoint-375/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f23724ff4d7c845b1dc0676ea3af8abdc294d9247ce526b3a4b06fbdb54c60d
3
+ size 627
checkpoint-375/special_tokens_map.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "cls_token": "[CLS]",
4
+ "eos_token": "[SEP]",
5
+ "mask_token": "[MASK]",
6
+ "pad_token": "[PAD]",
7
+ "sep_token": "[SEP]",
8
+ "unk_token": "[UNK]"
9
+ }
checkpoint-375/spm.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
+ size 2464616
checkpoint-375/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-375/tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "[CLS]",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "[CLS]",
5
+ "do_lower_case": false,
6
+ "eos_token": "[SEP]",
7
+ "mask_token": "[MASK]",
8
+ "model_max_length": 1000000000000000019884624838656,
9
+ "pad_token": "[PAD]",
10
+ "sep_token": "[SEP]",
11
+ "sp_model_kwargs": {},
12
+ "split_by_punct": false,
13
+ "tokenizer_class": "DebertaV2Tokenizer",
14
+ "unk_token": "[UNK]",
15
+ "vocab_type": "spm"
16
+ }
checkpoint-375/trainer_state.json ADDED
@@ -0,0 +1,516 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 15.0,
5
+ "global_step": 375,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.04,
12
+ "learning_rate": 1.9946666666666667e-05,
13
+ "loss": 2.3122,
14
+ "step": 1
15
+ },
16
+ {
17
+ "epoch": 0.76,
18
+ "learning_rate": 1.898666666666667e-05,
19
+ "loss": 2.3022,
20
+ "step": 19
21
+ },
22
+ {
23
+ "epoch": 0.76,
24
+ "eval_accuracy": 0.1,
25
+ "eval_disk_space_total": 78.1898422241211,
26
+ "eval_disk_space_used": 33.75697326660156,
27
+ "eval_f1": 0.018181818181818184,
28
+ "eval_gpu_ram_allocated": 4.172749996185303,
29
+ "eval_gpu_ram_cached": 26.759765625,
30
+ "eval_gpu_ram_total": 39.56402587890625,
31
+ "eval_gpu_utilization": 45,
32
+ "eval_loss": 2.3011605739593506,
33
+ "eval_precision": 0.01,
34
+ "eval_recall": 0.1,
35
+ "eval_runtime": 0.3917,
36
+ "eval_samples_per_second": 510.562,
37
+ "eval_steps_per_second": 17.87,
38
+ "eval_system_ram_total": 83.48074722290039,
39
+ "eval_system_ram_used": 4.445606231689453,
40
+ "step": 19
41
+ },
42
+ {
43
+ "epoch": 1.52,
44
+ "learning_rate": 1.7973333333333333e-05,
45
+ "loss": 2.2979,
46
+ "step": 38
47
+ },
48
+ {
49
+ "epoch": 1.52,
50
+ "eval_accuracy": 0.155,
51
+ "eval_disk_space_total": 78.1898422241211,
52
+ "eval_disk_space_used": 38.592166900634766,
53
+ "eval_f1": 0.06350931677018633,
54
+ "eval_gpu_ram_allocated": 4.172748565673828,
55
+ "eval_gpu_ram_cached": 26.771484375,
56
+ "eval_gpu_ram_total": 39.56402587890625,
57
+ "eval_gpu_utilization": 43,
58
+ "eval_loss": 2.2853753566741943,
59
+ "eval_precision": 0.04491869918699187,
60
+ "eval_recall": 0.155,
61
+ "eval_runtime": 0.3679,
62
+ "eval_samples_per_second": 543.585,
63
+ "eval_steps_per_second": 19.025,
64
+ "eval_system_ram_total": 83.48074722290039,
65
+ "eval_system_ram_used": 5.034675598144531,
66
+ "step": 38
67
+ },
68
+ {
69
+ "epoch": 2.28,
70
+ "learning_rate": 1.696e-05,
71
+ "loss": 2.2316,
72
+ "step": 57
73
+ },
74
+ {
75
+ "epoch": 2.28,
76
+ "eval_accuracy": 0.305,
77
+ "eval_disk_space_total": 78.1898422241211,
78
+ "eval_disk_space_used": 40.66392517089844,
79
+ "eval_f1": 0.2284533029124758,
80
+ "eval_gpu_ram_allocated": 4.172748565673828,
81
+ "eval_gpu_ram_cached": 26.771484375,
82
+ "eval_gpu_ram_total": 39.56402587890625,
83
+ "eval_gpu_utilization": 44,
84
+ "eval_loss": 2.1098108291625977,
85
+ "eval_precision": 0.2806426799007444,
86
+ "eval_recall": 0.305,
87
+ "eval_runtime": 0.3885,
88
+ "eval_samples_per_second": 514.84,
89
+ "eval_steps_per_second": 18.019,
90
+ "eval_system_ram_total": 83.48074722290039,
91
+ "eval_system_ram_used": 5.178070068359375,
92
+ "step": 57
93
+ },
94
+ {
95
+ "epoch": 3.04,
96
+ "learning_rate": 1.5946666666666668e-05,
97
+ "loss": 1.9915,
98
+ "step": 76
99
+ },
100
+ {
101
+ "epoch": 3.04,
102
+ "eval_accuracy": 0.43,
103
+ "eval_disk_space_total": 78.1898422241211,
104
+ "eval_disk_space_used": 40.663944244384766,
105
+ "eval_f1": 0.4147692106558386,
106
+ "eval_gpu_ram_allocated": 4.172744274139404,
107
+ "eval_gpu_ram_cached": 26.771484375,
108
+ "eval_gpu_ram_total": 39.56402587890625,
109
+ "eval_gpu_utilization": 50,
110
+ "eval_loss": 1.8477184772491455,
111
+ "eval_precision": 0.5039625709645932,
112
+ "eval_recall": 0.43,
113
+ "eval_runtime": 0.3674,
114
+ "eval_samples_per_second": 544.402,
115
+ "eval_steps_per_second": 19.054,
116
+ "eval_system_ram_total": 83.48074722290039,
117
+ "eval_system_ram_used": 5.174091339111328,
118
+ "step": 76
119
+ },
120
+ {
121
+ "epoch": 3.8,
122
+ "learning_rate": 1.4933333333333335e-05,
123
+ "loss": 1.684,
124
+ "step": 95
125
+ },
126
+ {
127
+ "epoch": 3.8,
128
+ "eval_accuracy": 0.55,
129
+ "eval_disk_space_total": 78.1898422241211,
130
+ "eval_disk_space_used": 40.663944244384766,
131
+ "eval_f1": 0.5271704828853498,
132
+ "eval_gpu_ram_allocated": 4.172764301300049,
133
+ "eval_gpu_ram_cached": 26.771484375,
134
+ "eval_gpu_ram_total": 39.56402587890625,
135
+ "eval_gpu_utilization": 47,
136
+ "eval_loss": 1.6027369499206543,
137
+ "eval_precision": 0.5665937491057825,
138
+ "eval_recall": 0.55,
139
+ "eval_runtime": 0.3642,
140
+ "eval_samples_per_second": 549.083,
141
+ "eval_steps_per_second": 19.218,
142
+ "eval_system_ram_total": 83.48074722290039,
143
+ "eval_system_ram_used": 5.1766204833984375,
144
+ "step": 95
145
+ },
146
+ {
147
+ "epoch": 4.56,
148
+ "learning_rate": 1.392e-05,
149
+ "loss": 1.3911,
150
+ "step": 114
151
+ },
152
+ {
153
+ "epoch": 4.56,
154
+ "eval_accuracy": 0.615,
155
+ "eval_disk_space_total": 78.1898422241211,
156
+ "eval_disk_space_used": 40.66395950317383,
157
+ "eval_f1": 0.6060242411805659,
158
+ "eval_gpu_ram_allocated": 4.172757148742676,
159
+ "eval_gpu_ram_cached": 26.771484375,
160
+ "eval_gpu_ram_total": 39.56402587890625,
161
+ "eval_gpu_utilization": 49,
162
+ "eval_loss": 1.4364641904830933,
163
+ "eval_precision": 0.619891761052247,
164
+ "eval_recall": 0.615,
165
+ "eval_runtime": 0.3684,
166
+ "eval_samples_per_second": 542.848,
167
+ "eval_steps_per_second": 19.0,
168
+ "eval_system_ram_total": 83.48074722290039,
169
+ "eval_system_ram_used": 5.174568176269531,
170
+ "step": 114
171
+ },
172
+ {
173
+ "epoch": 5.32,
174
+ "learning_rate": 1.2906666666666668e-05,
175
+ "loss": 1.1477,
176
+ "step": 133
177
+ },
178
+ {
179
+ "epoch": 5.32,
180
+ "eval_accuracy": 0.615,
181
+ "eval_disk_space_total": 78.1898422241211,
182
+ "eval_disk_space_used": 40.663963317871094,
183
+ "eval_f1": 0.6215457086252978,
184
+ "eval_gpu_ram_allocated": 4.172830104827881,
185
+ "eval_gpu_ram_cached": 26.771484375,
186
+ "eval_gpu_ram_total": 39.56402587890625,
187
+ "eval_gpu_utilization": 52,
188
+ "eval_loss": 1.2565349340438843,
189
+ "eval_precision": 0.6418622536733991,
190
+ "eval_recall": 0.615,
191
+ "eval_runtime": 0.3792,
192
+ "eval_samples_per_second": 527.473,
193
+ "eval_steps_per_second": 18.462,
194
+ "eval_system_ram_total": 83.48074722290039,
195
+ "eval_system_ram_used": 5.1585845947265625,
196
+ "step": 133
197
+ },
198
+ {
199
+ "epoch": 6.08,
200
+ "learning_rate": 1.1893333333333335e-05,
201
+ "loss": 0.9198,
202
+ "step": 152
203
+ },
204
+ {
205
+ "epoch": 6.08,
206
+ "eval_accuracy": 0.64,
207
+ "eval_disk_space_total": 78.1898422241211,
208
+ "eval_disk_space_used": 40.66396713256836,
209
+ "eval_f1": 0.6399837944918036,
210
+ "eval_gpu_ram_allocated": 4.172749996185303,
211
+ "eval_gpu_ram_cached": 26.771484375,
212
+ "eval_gpu_ram_total": 39.56402587890625,
213
+ "eval_gpu_utilization": 55,
214
+ "eval_loss": 1.175949215888977,
215
+ "eval_precision": 0.6532184905737537,
216
+ "eval_recall": 0.64,
217
+ "eval_runtime": 0.3814,
218
+ "eval_samples_per_second": 524.426,
219
+ "eval_steps_per_second": 18.355,
220
+ "eval_system_ram_total": 83.48074722290039,
221
+ "eval_system_ram_used": 5.180980682373047,
222
+ "step": 152
223
+ },
224
+ {
225
+ "epoch": 6.84,
226
+ "learning_rate": 1.0880000000000001e-05,
227
+ "loss": 0.7605,
228
+ "step": 171
229
+ },
230
+ {
231
+ "epoch": 6.84,
232
+ "eval_accuracy": 0.645,
233
+ "eval_disk_space_total": 78.1898422241211,
234
+ "eval_disk_space_used": 40.66401672363281,
235
+ "eval_f1": 0.6418225015596317,
236
+ "eval_gpu_ram_allocated": 4.1727471351623535,
237
+ "eval_gpu_ram_cached": 26.771484375,
238
+ "eval_gpu_ram_total": 39.56402587890625,
239
+ "eval_gpu_utilization": 45,
240
+ "eval_loss": 1.112830638885498,
241
+ "eval_precision": 0.6564146498179483,
242
+ "eval_recall": 0.645,
243
+ "eval_runtime": 0.3808,
244
+ "eval_samples_per_second": 525.141,
245
+ "eval_steps_per_second": 18.38,
246
+ "eval_system_ram_total": 83.48074722290039,
247
+ "eval_system_ram_used": 5.141529083251953,
248
+ "step": 171
249
+ },
250
+ {
251
+ "epoch": 7.6,
252
+ "learning_rate": 9.866666666666668e-06,
253
+ "loss": 0.6093,
254
+ "step": 190
255
+ },
256
+ {
257
+ "epoch": 7.6,
258
+ "eval_accuracy": 0.67,
259
+ "eval_disk_space_total": 78.1898422241211,
260
+ "eval_disk_space_used": 40.66403579711914,
261
+ "eval_f1": 0.6677676797410276,
262
+ "eval_gpu_ram_allocated": 4.172770023345947,
263
+ "eval_gpu_ram_cached": 26.771484375,
264
+ "eval_gpu_ram_total": 39.56402587890625,
265
+ "eval_gpu_utilization": 43,
266
+ "eval_loss": 1.0766719579696655,
267
+ "eval_precision": 0.6757531007004691,
268
+ "eval_recall": 0.67,
269
+ "eval_runtime": 0.383,
270
+ "eval_samples_per_second": 522.188,
271
+ "eval_steps_per_second": 18.277,
272
+ "eval_system_ram_total": 83.48074722290039,
273
+ "eval_system_ram_used": 5.134746551513672,
274
+ "step": 190
275
+ },
276
+ {
277
+ "epoch": 8.36,
278
+ "learning_rate": 8.853333333333334e-06,
279
+ "loss": 0.5111,
280
+ "step": 209
281
+ },
282
+ {
283
+ "epoch": 8.36,
284
+ "eval_accuracy": 0.655,
285
+ "eval_disk_space_total": 78.1898422241211,
286
+ "eval_disk_space_used": 40.6640510559082,
287
+ "eval_f1": 0.655230999157746,
288
+ "eval_gpu_ram_allocated": 4.1727728843688965,
289
+ "eval_gpu_ram_cached": 26.771484375,
290
+ "eval_gpu_ram_total": 39.56402587890625,
291
+ "eval_gpu_utilization": 52,
292
+ "eval_loss": 1.1033188104629517,
293
+ "eval_precision": 0.6741666516629499,
294
+ "eval_recall": 0.655,
295
+ "eval_runtime": 0.369,
296
+ "eval_samples_per_second": 541.937,
297
+ "eval_steps_per_second": 18.968,
298
+ "eval_system_ram_total": 83.48074722290039,
299
+ "eval_system_ram_used": 5.120639801025391,
300
+ "step": 209
301
+ },
302
+ {
303
+ "epoch": 9.12,
304
+ "learning_rate": 7.840000000000001e-06,
305
+ "loss": 0.3828,
306
+ "step": 228
307
+ },
308
+ {
309
+ "epoch": 9.12,
310
+ "eval_accuracy": 0.69,
311
+ "eval_disk_space_total": 78.1898422241211,
312
+ "eval_disk_space_used": 40.664058685302734,
313
+ "eval_f1": 0.6874757485635422,
314
+ "eval_gpu_ram_allocated": 4.17274284362793,
315
+ "eval_gpu_ram_cached": 26.771484375,
316
+ "eval_gpu_ram_total": 39.56402587890625,
317
+ "eval_gpu_utilization": 44,
318
+ "eval_loss": 1.1062885522842407,
319
+ "eval_precision": 0.692681623931624,
320
+ "eval_recall": 0.69,
321
+ "eval_runtime": 0.373,
322
+ "eval_samples_per_second": 536.14,
323
+ "eval_steps_per_second": 18.765,
324
+ "eval_system_ram_total": 83.48074722290039,
325
+ "eval_system_ram_used": 5.148380279541016,
326
+ "step": 228
327
+ },
328
+ {
329
+ "epoch": 9.88,
330
+ "learning_rate": 6.826666666666667e-06,
331
+ "loss": 0.3082,
332
+ "step": 247
333
+ },
334
+ {
335
+ "epoch": 9.88,
336
+ "eval_accuracy": 0.665,
337
+ "eval_disk_space_total": 78.1898422241211,
338
+ "eval_disk_space_used": 40.664058685302734,
339
+ "eval_f1": 0.657310556170902,
340
+ "eval_gpu_ram_allocated": 4.172765731811523,
341
+ "eval_gpu_ram_cached": 26.771484375,
342
+ "eval_gpu_ram_total": 39.56402587890625,
343
+ "eval_gpu_utilization": 45,
344
+ "eval_loss": 1.1239553689956665,
345
+ "eval_precision": 0.6595053580899589,
346
+ "eval_recall": 0.665,
347
+ "eval_runtime": 0.3747,
348
+ "eval_samples_per_second": 533.805,
349
+ "eval_steps_per_second": 18.683,
350
+ "eval_system_ram_total": 83.48074722290039,
351
+ "eval_system_ram_used": 5.143707275390625,
352
+ "step": 247
353
+ },
354
+ {
355
+ "epoch": 10.64,
356
+ "learning_rate": 5.813333333333334e-06,
357
+ "loss": 0.2716,
358
+ "step": 266
359
+ },
360
+ {
361
+ "epoch": 10.64,
362
+ "eval_accuracy": 0.665,
363
+ "eval_disk_space_total": 78.1898422241211,
364
+ "eval_disk_space_used": 40.66410827636719,
365
+ "eval_f1": 0.6603758535972988,
366
+ "eval_gpu_ram_allocated": 4.172775745391846,
367
+ "eval_gpu_ram_cached": 26.771484375,
368
+ "eval_gpu_ram_total": 39.56402587890625,
369
+ "eval_gpu_utilization": 45,
370
+ "eval_loss": 1.157199501991272,
371
+ "eval_precision": 0.666485053212742,
372
+ "eval_recall": 0.665,
373
+ "eval_runtime": 0.3689,
374
+ "eval_samples_per_second": 542.088,
375
+ "eval_steps_per_second": 18.973,
376
+ "eval_system_ram_total": 83.48074722290039,
377
+ "eval_system_ram_used": 5.068927764892578,
378
+ "step": 266
379
+ },
380
+ {
381
+ "epoch": 11.4,
382
+ "learning_rate": 4.800000000000001e-06,
383
+ "loss": 0.2442,
384
+ "step": 285
385
+ },
386
+ {
387
+ "epoch": 11.4,
388
+ "eval_accuracy": 0.675,
389
+ "eval_disk_space_total": 78.1898422241211,
390
+ "eval_disk_space_used": 40.66411209106445,
391
+ "eval_f1": 0.6764825046795931,
392
+ "eval_gpu_ram_allocated": 4.17277717590332,
393
+ "eval_gpu_ram_cached": 26.771484375,
394
+ "eval_gpu_ram_total": 39.56402587890625,
395
+ "eval_gpu_utilization": 42,
396
+ "eval_loss": 1.1057575941085815,
397
+ "eval_precision": 0.6826701222753855,
398
+ "eval_recall": 0.675,
399
+ "eval_runtime": 0.3756,
400
+ "eval_samples_per_second": 532.455,
401
+ "eval_steps_per_second": 18.636,
402
+ "eval_system_ram_total": 83.48074722290039,
403
+ "eval_system_ram_used": 5.031635284423828,
404
+ "step": 285
405
+ },
406
+ {
407
+ "epoch": 12.16,
408
+ "learning_rate": 3.7866666666666667e-06,
409
+ "loss": 0.1791,
410
+ "step": 304
411
+ },
412
+ {
413
+ "epoch": 12.16,
414
+ "eval_accuracy": 0.645,
415
+ "eval_disk_space_total": 78.1898422241211,
416
+ "eval_disk_space_used": 40.664119720458984,
417
+ "eval_f1": 0.6445427433817439,
418
+ "eval_gpu_ram_allocated": 4.172751426696777,
419
+ "eval_gpu_ram_cached": 26.771484375,
420
+ "eval_gpu_ram_total": 39.56402587890625,
421
+ "eval_gpu_utilization": 46,
422
+ "eval_loss": 1.1455398797988892,
423
+ "eval_precision": 0.6514674297225784,
424
+ "eval_recall": 0.645,
425
+ "eval_runtime": 0.37,
426
+ "eval_samples_per_second": 540.499,
427
+ "eval_steps_per_second": 18.917,
428
+ "eval_system_ram_total": 83.48074722290039,
429
+ "eval_system_ram_used": 5.071483612060547,
430
+ "step": 304
431
+ },
432
+ {
433
+ "epoch": 12.92,
434
+ "learning_rate": 2.7733333333333336e-06,
435
+ "loss": 0.1604,
436
+ "step": 323
437
+ },
438
+ {
439
+ "epoch": 12.92,
440
+ "eval_accuracy": 0.66,
441
+ "eval_disk_space_total": 78.1898422241211,
442
+ "eval_disk_space_used": 40.66413116455078,
443
+ "eval_f1": 0.6578390320866488,
444
+ "eval_gpu_ram_allocated": 4.172774314880371,
445
+ "eval_gpu_ram_cached": 26.771484375,
446
+ "eval_gpu_ram_total": 39.56402587890625,
447
+ "eval_gpu_utilization": 57,
448
+ "eval_loss": 1.1514325141906738,
449
+ "eval_precision": 0.6686316887895163,
450
+ "eval_recall": 0.66,
451
+ "eval_runtime": 0.368,
452
+ "eval_samples_per_second": 543.448,
453
+ "eval_steps_per_second": 19.021,
454
+ "eval_system_ram_total": 83.48074722290039,
455
+ "eval_system_ram_used": 5.0727691650390625,
456
+ "step": 323
457
+ },
458
+ {
459
+ "epoch": 13.68,
460
+ "learning_rate": 1.76e-06,
461
+ "loss": 0.1389,
462
+ "step": 342
463
+ },
464
+ {
465
+ "epoch": 13.68,
466
+ "eval_accuracy": 0.675,
467
+ "eval_disk_space_total": 78.1898422241211,
468
+ "eval_disk_space_used": 40.66417694091797,
469
+ "eval_f1": 0.6714518778998741,
470
+ "eval_gpu_ram_allocated": 4.172738552093506,
471
+ "eval_gpu_ram_cached": 26.771484375,
472
+ "eval_gpu_ram_total": 39.56402587890625,
473
+ "eval_gpu_utilization": 48,
474
+ "eval_loss": 1.160007357597351,
475
+ "eval_precision": 0.6807936997642879,
476
+ "eval_recall": 0.675,
477
+ "eval_runtime": 0.3735,
478
+ "eval_samples_per_second": 535.495,
479
+ "eval_steps_per_second": 18.742,
480
+ "eval_system_ram_total": 83.48074722290039,
481
+ "eval_system_ram_used": 5.065456390380859,
482
+ "step": 342
483
+ },
484
+ {
485
+ "epoch": 14.44,
486
+ "learning_rate": 7.466666666666668e-07,
487
+ "loss": 0.151,
488
+ "step": 361
489
+ },
490
+ {
491
+ "epoch": 14.44,
492
+ "eval_accuracy": 0.665,
493
+ "eval_disk_space_total": 78.1898422241211,
494
+ "eval_disk_space_used": 40.664188385009766,
495
+ "eval_f1": 0.6625899013665082,
496
+ "eval_gpu_ram_allocated": 4.172741413116455,
497
+ "eval_gpu_ram_cached": 26.771484375,
498
+ "eval_gpu_ram_total": 39.56402587890625,
499
+ "eval_gpu_utilization": 48,
500
+ "eval_loss": 1.1573272943496704,
501
+ "eval_precision": 0.6686724234321488,
502
+ "eval_recall": 0.665,
503
+ "eval_runtime": 0.3721,
504
+ "eval_samples_per_second": 537.558,
505
+ "eval_steps_per_second": 18.815,
506
+ "eval_system_ram_total": 83.48074722290039,
507
+ "eval_system_ram_used": 5.058799743652344,
508
+ "step": 361
509
+ }
510
+ ],
511
+ "max_steps": 375,
512
+ "num_train_epochs": 15,
513
+ "total_flos": 825897508635264.0,
514
+ "trial_name": null,
515
+ "trial_params": null
516
+ }
checkpoint-375/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f706732951169848f8127a8491def3af20921df42d30cfab655bcd2fbe3ce7
3
+ size 4091
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a37c7031758a4eed7cf4c6318896b5e93e57169ad17514c2908814b2511d8869
3
  size 737788917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffc4b810698b3fcb8652ef309273ad4a6eee6a8b551f0cf6d1cbcc23c8eb1803
3
  size 737788917
runs/Jul22_09-22-43_549e78cb1c68/events.out.tfevents.1690017769.549e78cb1c68.4107.2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a3756929d2c70a56a010d397a7a2210b9f18c2837aca92ea7c8de9baeeb284d
3
- size 6194
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd61c877e41419fbc01111d461dfaffcb6b789b2b80f7175a1e3b0896ef0406
3
+ size 26284
runs/Jul22_09-22-43_549e78cb1c68/events.out.tfevents.1690017926.549e78cb1c68.4107.3 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e799275b2b89f5dc2cb436ad443f357c6c29221e10e3c030e9ff5198768ee0
3
+ size 1033