MUHAMMADSAADAMIN commited on
Commit
9b5a75b
·
verified ·
1 Parent(s): 5d54242

Upload folder using huggingface_hub

Browse files
config.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_cross_attention": false,
3
+ "architectures": [
4
+ "RobertaForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "dtype": "float32",
10
+ "eos_token_id": 2,
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "initializer_range": 0.02,
15
+ "intermediate_size": 3072,
16
+ "is_decoder": false,
17
+ "layer_norm_eps": 1e-05,
18
+ "max_position_embeddings": 514,
19
+ "model_type": "roberta",
20
+ "num_attention_heads": 12,
21
+ "num_hidden_layers": 12,
22
+ "output_past": true,
23
+ "pad_token_id": 1,
24
+ "problem_type": "single_label_classification",
25
+ "tie_word_embeddings": true,
26
+ "transformers_version": "5.0.0",
27
+ "type_vocab_size": 1,
28
+ "use_cache": false,
29
+ "vocab_size": 50265
30
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:209e18b1560b4629e233cb8f109fc79fe8783636765d1587b501d3d9d6d04632
3
+ size 498612800
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8875c9eda4a9444a1d3d0f08f68b7a72f17ba43003aa9550550c60e286f2078f
3
+ size 997348747
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e35fdb8910d426c9c16878ea3078dc0568d86e073d0a7f891b2f09ffe5b5b22e
3
+ size 14645
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c75de975c68bbeb8ce37282f26021c4e18754c36408856eaa382dc6627e5aaf3
3
+ size 1465
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "errors": "replace",
8
+ "is_local": true,
9
+ "mask_token": "<mask>",
10
+ "model_max_length": 512,
11
+ "pad_token": "<pad>",
12
+ "sep_token": "</s>",
13
+ "tokenizer_class": "RobertaTokenizer",
14
+ "trim_offsets": true,
15
+ "unk_token": "<unk>"
16
+ }
trainer_state.json ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 4000,
3
+ "best_metric": 0.6931638121604919,
4
+ "best_model_checkpoint": "/content/drive/MyDrive/PolyGuard/model_final/checkpoint-4000",
5
+ "epoch": 2.0,
6
+ "eval_steps": 500,
7
+ "global_step": 8000,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.025,
14
+ "grad_norm": 4.840836524963379,
15
+ "learning_rate": 2.4750000000000002e-05,
16
+ "loss": 0.6476753997802734,
17
+ "step": 100
18
+ },
19
+ {
20
+ "epoch": 0.05,
21
+ "grad_norm": 3.3573901653289795,
22
+ "learning_rate": 4.975e-05,
23
+ "loss": 0.6003963470458984,
24
+ "step": 200
25
+ },
26
+ {
27
+ "epoch": 0.075,
28
+ "grad_norm": 5.3957953453063965,
29
+ "learning_rate": 4.96867088607595e-05,
30
+ "loss": 0.5752050399780273,
31
+ "step": 300
32
+ },
33
+ {
34
+ "epoch": 0.1,
35
+ "grad_norm": 6.348972797393799,
36
+ "learning_rate": 4.937025316455696e-05,
37
+ "loss": 0.5668778610229492,
38
+ "step": 400
39
+ },
40
+ {
41
+ "epoch": 0.125,
42
+ "grad_norm": 3.777247428894043,
43
+ "learning_rate": 4.905379746835443e-05,
44
+ "loss": 0.6448370361328125,
45
+ "step": 500
46
+ },
47
+ {
48
+ "epoch": 0.15,
49
+ "grad_norm": 5.974273204803467,
50
+ "learning_rate": 4.87373417721519e-05,
51
+ "loss": 0.5914559173583984,
52
+ "step": 600
53
+ },
54
+ {
55
+ "epoch": 0.175,
56
+ "grad_norm": 5.3988165855407715,
57
+ "learning_rate": 4.842088607594937e-05,
58
+ "loss": 0.6363700103759765,
59
+ "step": 700
60
+ },
61
+ {
62
+ "epoch": 0.2,
63
+ "grad_norm": 2.299058675765991,
64
+ "learning_rate": 4.810443037974684e-05,
65
+ "loss": 0.5667419815063477,
66
+ "step": 800
67
+ },
68
+ {
69
+ "epoch": 0.225,
70
+ "grad_norm": 3.1783978939056396,
71
+ "learning_rate": 4.7787974683544305e-05,
72
+ "loss": 0.5284980392456055,
73
+ "step": 900
74
+ },
75
+ {
76
+ "epoch": 0.25,
77
+ "grad_norm": 3.9692907333374023,
78
+ "learning_rate": 4.747151898734177e-05,
79
+ "loss": 0.56060546875,
80
+ "step": 1000
81
+ },
82
+ {
83
+ "epoch": 0.275,
84
+ "grad_norm": 2.46657657623291,
85
+ "learning_rate": 4.715506329113925e-05,
86
+ "loss": 0.5778974151611328,
87
+ "step": 1100
88
+ },
89
+ {
90
+ "epoch": 0.3,
91
+ "grad_norm": 1.4161385297775269,
92
+ "learning_rate": 4.683860759493671e-05,
93
+ "loss": 0.5550444412231446,
94
+ "step": 1200
95
+ },
96
+ {
97
+ "epoch": 0.325,
98
+ "grad_norm": 6.127955436706543,
99
+ "learning_rate": 4.652215189873418e-05,
100
+ "loss": 0.638050308227539,
101
+ "step": 1300
102
+ },
103
+ {
104
+ "epoch": 0.35,
105
+ "grad_norm": 2.9411966800689697,
106
+ "learning_rate": 4.620569620253164e-05,
107
+ "loss": 0.5464860153198242,
108
+ "step": 1400
109
+ },
110
+ {
111
+ "epoch": 0.375,
112
+ "grad_norm": 5.668337345123291,
113
+ "learning_rate": 4.588924050632912e-05,
114
+ "loss": 0.6324460220336914,
115
+ "step": 1500
116
+ },
117
+ {
118
+ "epoch": 0.4,
119
+ "grad_norm": 5.304285049438477,
120
+ "learning_rate": 4.5572784810126585e-05,
121
+ "loss": 0.5795536041259766,
122
+ "step": 1600
123
+ },
124
+ {
125
+ "epoch": 0.425,
126
+ "grad_norm": 1.6371102333068848,
127
+ "learning_rate": 4.525632911392405e-05,
128
+ "loss": 0.5774750137329101,
129
+ "step": 1700
130
+ },
131
+ {
132
+ "epoch": 0.45,
133
+ "grad_norm": 1.4340555667877197,
134
+ "learning_rate": 4.493987341772152e-05,
135
+ "loss": 0.6004017257690429,
136
+ "step": 1800
137
+ },
138
+ {
139
+ "epoch": 0.475,
140
+ "grad_norm": 2.997642755508423,
141
+ "learning_rate": 4.462341772151899e-05,
142
+ "loss": 0.6037093734741211,
143
+ "step": 1900
144
+ },
145
+ {
146
+ "epoch": 0.5,
147
+ "grad_norm": 1.124809741973877,
148
+ "learning_rate": 4.430696202531646e-05,
149
+ "loss": 0.5478248596191406,
150
+ "step": 2000
151
+ },
152
+ {
153
+ "epoch": 0.525,
154
+ "grad_norm": 1.4012881517410278,
155
+ "learning_rate": 4.399050632911393e-05,
156
+ "loss": 0.5746703720092774,
157
+ "step": 2100
158
+ },
159
+ {
160
+ "epoch": 0.55,
161
+ "grad_norm": 2.774062395095825,
162
+ "learning_rate": 4.367405063291139e-05,
163
+ "loss": 0.5518299865722657,
164
+ "step": 2200
165
+ },
166
+ {
167
+ "epoch": 0.575,
168
+ "grad_norm": 0.9460004568099976,
169
+ "learning_rate": 4.3357594936708864e-05,
170
+ "loss": 0.5653076553344727,
171
+ "step": 2300
172
+ },
173
+ {
174
+ "epoch": 0.6,
175
+ "grad_norm": 4.564599514007568,
176
+ "learning_rate": 4.304113924050633e-05,
177
+ "loss": 0.626568832397461,
178
+ "step": 2400
179
+ },
180
+ {
181
+ "epoch": 0.625,
182
+ "grad_norm": 0.9103949666023254,
183
+ "learning_rate": 4.27246835443038e-05,
184
+ "loss": 0.5939551544189453,
185
+ "step": 2500
186
+ },
187
+ {
188
+ "epoch": 0.65,
189
+ "grad_norm": 3.449150800704956,
190
+ "learning_rate": 4.2408227848101265e-05,
191
+ "loss": 0.5971554946899414,
192
+ "step": 2600
193
+ },
194
+ {
195
+ "epoch": 0.675,
196
+ "grad_norm": 3.5549769401550293,
197
+ "learning_rate": 4.2091772151898736e-05,
198
+ "loss": 0.5751391983032227,
199
+ "step": 2700
200
+ },
201
+ {
202
+ "epoch": 0.7,
203
+ "grad_norm": 1.6542292833328247,
204
+ "learning_rate": 4.177531645569621e-05,
205
+ "loss": 0.5986330032348632,
206
+ "step": 2800
207
+ },
208
+ {
209
+ "epoch": 0.725,
210
+ "grad_norm": 1.3074430227279663,
211
+ "learning_rate": 4.145886075949367e-05,
212
+ "loss": 0.5717515182495118,
213
+ "step": 2900
214
+ },
215
+ {
216
+ "epoch": 0.75,
217
+ "grad_norm": 3.095973014831543,
218
+ "learning_rate": 4.114240506329114e-05,
219
+ "loss": 0.5813043594360352,
220
+ "step": 3000
221
+ },
222
+ {
223
+ "epoch": 0.775,
224
+ "grad_norm": 2.611328363418579,
225
+ "learning_rate": 4.0825949367088615e-05,
226
+ "loss": 0.6051469039916992,
227
+ "step": 3100
228
+ },
229
+ {
230
+ "epoch": 0.8,
231
+ "grad_norm": 2.4239795207977295,
232
+ "learning_rate": 4.050949367088608e-05,
233
+ "loss": 0.5933720779418945,
234
+ "step": 3200
235
+ },
236
+ {
237
+ "epoch": 0.825,
238
+ "grad_norm": 1.3653069734573364,
239
+ "learning_rate": 4.0193037974683544e-05,
240
+ "loss": 0.5751293182373047,
241
+ "step": 3300
242
+ },
243
+ {
244
+ "epoch": 0.85,
245
+ "grad_norm": 1.4362722635269165,
246
+ "learning_rate": 3.9876582278481015e-05,
247
+ "loss": 0.5673767852783204,
248
+ "step": 3400
249
+ },
250
+ {
251
+ "epoch": 0.875,
252
+ "grad_norm": 1.1265358924865723,
253
+ "learning_rate": 3.956012658227849e-05,
254
+ "loss": 0.6048561477661133,
255
+ "step": 3500
256
+ },
257
+ {
258
+ "epoch": 0.9,
259
+ "grad_norm": 1.3712810277938843,
260
+ "learning_rate": 3.924367088607595e-05,
261
+ "loss": 0.6653845977783203,
262
+ "step": 3600
263
+ },
264
+ {
265
+ "epoch": 0.925,
266
+ "grad_norm": 1.213412880897522,
267
+ "learning_rate": 3.8927215189873416e-05,
268
+ "loss": 0.6943452453613281,
269
+ "step": 3700
270
+ },
271
+ {
272
+ "epoch": 0.95,
273
+ "grad_norm": 2.019483804702759,
274
+ "learning_rate": 3.861075949367089e-05,
275
+ "loss": 0.6974296569824219,
276
+ "step": 3800
277
+ },
278
+ {
279
+ "epoch": 0.975,
280
+ "grad_norm": 1.273471713066101,
281
+ "learning_rate": 3.829430379746836e-05,
282
+ "loss": 0.706220474243164,
283
+ "step": 3900
284
+ },
285
+ {
286
+ "epoch": 1.0,
287
+ "grad_norm": 2.3656868934631348,
288
+ "learning_rate": 3.7977848101265823e-05,
289
+ "loss": 0.702726058959961,
290
+ "step": 4000
291
+ },
292
+ {
293
+ "epoch": 1.0,
294
+ "eval_loss": 0.6931638121604919,
295
+ "eval_runtime": 166.9577,
296
+ "eval_samples_per_second": 47.916,
297
+ "eval_steps_per_second": 5.99,
298
+ "step": 4000
299
+ },
300
+ {
301
+ "epoch": 1.025,
302
+ "grad_norm": 1.4108144044876099,
303
+ "learning_rate": 3.7661392405063295e-05,
304
+ "loss": 0.6999430847167969,
305
+ "step": 4100
306
+ },
307
+ {
308
+ "epoch": 1.05,
309
+ "grad_norm": 2.259119987487793,
310
+ "learning_rate": 3.734493670886076e-05,
311
+ "loss": 0.7005876922607421,
312
+ "step": 4200
313
+ },
314
+ {
315
+ "epoch": 1.075,
316
+ "grad_norm": 0.8190616965293884,
317
+ "learning_rate": 3.702848101265823e-05,
318
+ "loss": 0.6983786010742188,
319
+ "step": 4300
320
+ },
321
+ {
322
+ "epoch": 1.1,
323
+ "grad_norm": 1.3949053287506104,
324
+ "learning_rate": 3.67120253164557e-05,
325
+ "loss": 0.6970309448242188,
326
+ "step": 4400
327
+ },
328
+ {
329
+ "epoch": 1.125,
330
+ "grad_norm": 1.9007196426391602,
331
+ "learning_rate": 3.639556962025317e-05,
332
+ "loss": 0.7024803161621094,
333
+ "step": 4500
334
+ },
335
+ {
336
+ "epoch": 1.15,
337
+ "grad_norm": 3.2963850498199463,
338
+ "learning_rate": 3.607911392405063e-05,
339
+ "loss": 0.6969153594970703,
340
+ "step": 4600
341
+ },
342
+ {
343
+ "epoch": 1.175,
344
+ "grad_norm": 2.8197951316833496,
345
+ "learning_rate": 3.57626582278481e-05,
346
+ "loss": 0.6913418579101562,
347
+ "step": 4700
348
+ },
349
+ {
350
+ "epoch": 1.2,
351
+ "grad_norm": 3.7260422706604004,
352
+ "learning_rate": 3.5446202531645574e-05,
353
+ "loss": 0.6954251098632812,
354
+ "step": 4800
355
+ },
356
+ {
357
+ "epoch": 1.225,
358
+ "grad_norm": 2.538835048675537,
359
+ "learning_rate": 3.512974683544304e-05,
360
+ "loss": 0.7016423797607422,
361
+ "step": 4900
362
+ },
363
+ {
364
+ "epoch": 1.25,
365
+ "grad_norm": 1.9548262357711792,
366
+ "learning_rate": 3.48132911392405e-05,
367
+ "loss": 0.6976743316650391,
368
+ "step": 5000
369
+ },
370
+ {
371
+ "epoch": 1.275,
372
+ "grad_norm": 4.0937180519104,
373
+ "learning_rate": 3.4496835443037975e-05,
374
+ "loss": 0.700084228515625,
375
+ "step": 5100
376
+ },
377
+ {
378
+ "epoch": 1.3,
379
+ "grad_norm": 2.521569013595581,
380
+ "learning_rate": 3.4180379746835446e-05,
381
+ "loss": 0.6976382446289062,
382
+ "step": 5200
383
+ },
384
+ {
385
+ "epoch": 1.325,
386
+ "grad_norm": 4.047502517700195,
387
+ "learning_rate": 3.386392405063291e-05,
388
+ "loss": 0.6967656707763672,
389
+ "step": 5300
390
+ },
391
+ {
392
+ "epoch": 1.35,
393
+ "grad_norm": 3.4342639446258545,
394
+ "learning_rate": 3.354746835443038e-05,
395
+ "loss": 0.6955546569824219,
396
+ "step": 5400
397
+ },
398
+ {
399
+ "epoch": 1.375,
400
+ "grad_norm": 1.5393496751785278,
401
+ "learning_rate": 3.3231012658227854e-05,
402
+ "loss": 0.6965518951416015,
403
+ "step": 5500
404
+ },
405
+ {
406
+ "epoch": 1.4,
407
+ "grad_norm": 4.321380138397217,
408
+ "learning_rate": 3.291455696202532e-05,
409
+ "loss": 0.6958496856689453,
410
+ "step": 5600
411
+ },
412
+ {
413
+ "epoch": 1.425,
414
+ "grad_norm": 1.1191785335540771,
415
+ "learning_rate": 3.259810126582279e-05,
416
+ "loss": 0.6999098968505859,
417
+ "step": 5700
418
+ },
419
+ {
420
+ "epoch": 1.45,
421
+ "grad_norm": 0.8614036440849304,
422
+ "learning_rate": 3.2281645569620254e-05,
423
+ "loss": 0.6959712219238281,
424
+ "step": 5800
425
+ },
426
+ {
427
+ "epoch": 1.475,
428
+ "grad_norm": 0.9664958715438843,
429
+ "learning_rate": 3.1965189873417725e-05,
430
+ "loss": 0.6925695037841797,
431
+ "step": 5900
432
+ },
433
+ {
434
+ "epoch": 1.5,
435
+ "grad_norm": 2.1453211307525635,
436
+ "learning_rate": 3.164873417721519e-05,
437
+ "loss": 0.698175048828125,
438
+ "step": 6000
439
+ },
440
+ {
441
+ "epoch": 1.525,
442
+ "grad_norm": 2.1440930366516113,
443
+ "learning_rate": 3.133227848101266e-05,
444
+ "loss": 0.682925796508789,
445
+ "step": 6100
446
+ },
447
+ {
448
+ "epoch": 1.55,
449
+ "grad_norm": 0.8990124464035034,
450
+ "learning_rate": 3.1015822784810126e-05,
451
+ "loss": 0.6517456817626953,
452
+ "step": 6200
453
+ },
454
+ {
455
+ "epoch": 1.575,
456
+ "grad_norm": 1.3929342031478882,
457
+ "learning_rate": 3.06993670886076e-05,
458
+ "loss": 0.7006549835205078,
459
+ "step": 6300
460
+ },
461
+ {
462
+ "epoch": 1.6,
463
+ "grad_norm": 2.1912875175476074,
464
+ "learning_rate": 3.0382911392405065e-05,
465
+ "loss": 0.6975661468505859,
466
+ "step": 6400
467
+ },
468
+ {
469
+ "epoch": 1.625,
470
+ "grad_norm": 1.1694247722625732,
471
+ "learning_rate": 3.0066455696202533e-05,
472
+ "loss": 0.6955360412597656,
473
+ "step": 6500
474
+ },
475
+ {
476
+ "epoch": 1.65,
477
+ "grad_norm": 3.340589761734009,
478
+ "learning_rate": 2.975e-05,
479
+ "loss": 0.7008393859863281,
480
+ "step": 6600
481
+ },
482
+ {
483
+ "epoch": 1.675,
484
+ "grad_norm": 2.42465877532959,
485
+ "learning_rate": 2.9433544303797473e-05,
486
+ "loss": 0.6998232269287109,
487
+ "step": 6700
488
+ },
489
+ {
490
+ "epoch": 1.7,
491
+ "grad_norm": 1.9139105081558228,
492
+ "learning_rate": 2.9117088607594937e-05,
493
+ "loss": 0.6989698028564453,
494
+ "step": 6800
495
+ },
496
+ {
497
+ "epoch": 1.725,
498
+ "grad_norm": 0.7264005541801453,
499
+ "learning_rate": 2.8800632911392405e-05,
500
+ "loss": 0.6944959259033203,
501
+ "step": 6900
502
+ },
503
+ {
504
+ "epoch": 1.75,
505
+ "grad_norm": 1.1322827339172363,
506
+ "learning_rate": 2.8484177215189873e-05,
507
+ "loss": 0.7098442840576172,
508
+ "step": 7000
509
+ },
510
+ {
511
+ "epoch": 1.775,
512
+ "grad_norm": 2.150141477584839,
513
+ "learning_rate": 2.8167721518987345e-05,
514
+ "loss": 0.6946941375732422,
515
+ "step": 7100
516
+ },
517
+ {
518
+ "epoch": 1.8,
519
+ "grad_norm": 1.850095510482788,
520
+ "learning_rate": 2.785126582278481e-05,
521
+ "loss": 0.6941130065917969,
522
+ "step": 7200
523
+ },
524
+ {
525
+ "epoch": 1.825,
526
+ "grad_norm": 1.994320273399353,
527
+ "learning_rate": 2.7534810126582277e-05,
528
+ "loss": 0.6933687591552734,
529
+ "step": 7300
530
+ },
531
+ {
532
+ "epoch": 1.85,
533
+ "grad_norm": 1.1272798776626587,
534
+ "learning_rate": 2.721835443037975e-05,
535
+ "loss": 0.7010916900634766,
536
+ "step": 7400
537
+ },
538
+ {
539
+ "epoch": 1.875,
540
+ "grad_norm": 2.2662463188171387,
541
+ "learning_rate": 2.6901898734177217e-05,
542
+ "loss": 0.6951226806640625,
543
+ "step": 7500
544
+ },
545
+ {
546
+ "epoch": 1.9,
547
+ "grad_norm": 2.8019468784332275,
548
+ "learning_rate": 2.6585443037974685e-05,
549
+ "loss": 0.6966998291015625,
550
+ "step": 7600
551
+ },
552
+ {
553
+ "epoch": 1.925,
554
+ "grad_norm": 2.3949637413024902,
555
+ "learning_rate": 2.6268987341772156e-05,
556
+ "loss": 0.6956380462646484,
557
+ "step": 7700
558
+ },
559
+ {
560
+ "epoch": 1.95,
561
+ "grad_norm": 2.5100715160369873,
562
+ "learning_rate": 2.595253164556962e-05,
563
+ "loss": 0.6968719482421875,
564
+ "step": 7800
565
+ },
566
+ {
567
+ "epoch": 1.975,
568
+ "grad_norm": 6.460758209228516,
569
+ "learning_rate": 2.563607594936709e-05,
570
+ "loss": 0.692848892211914,
571
+ "step": 7900
572
+ },
573
+ {
574
+ "epoch": 2.0,
575
+ "grad_norm": 3.2004637718200684,
576
+ "learning_rate": 2.5319620253164557e-05,
577
+ "loss": 0.6972612762451171,
578
+ "step": 8000
579
+ },
580
+ {
581
+ "epoch": 2.0,
582
+ "eval_loss": 0.6932027339935303,
583
+ "eval_runtime": 166.3088,
584
+ "eval_samples_per_second": 48.103,
585
+ "eval_steps_per_second": 6.013,
586
+ "step": 8000
587
+ }
588
+ ],
589
+ "logging_steps": 100,
590
+ "max_steps": 16000,
591
+ "num_input_tokens_seen": 0,
592
+ "num_train_epochs": 4,
593
+ "save_steps": 500,
594
+ "stateful_callbacks": {
595
+ "TrainerControl": {
596
+ "args": {
597
+ "should_epoch_stop": false,
598
+ "should_evaluate": false,
599
+ "should_log": false,
600
+ "should_save": true,
601
+ "should_training_stop": false
602
+ },
603
+ "attributes": {}
604
+ }
605
+ },
606
+ "total_flos": 8419553771520000.0,
607
+ "train_batch_size": 8,
608
+ "trial_name": null,
609
+ "trial_params": null
610
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144d97010bcda68f4d4b7151c2bcd2b1d3638f1a60032525b2ff2fdfd41fd895
3
+ size 5201
training_metadata.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trained_at": "2026-04-29T00:06:05.040980",
3
+ "base_model": "microsoft/codebert-base",
4
+ "train_samples": 16681,
5
+ "val_samples": 2275,
6
+ "best_f1": 0.6698,
7
+ "epochs_trained": 14,
8
+ "augmented": true,
9
+ "version": "v5_extended"
10
+ }