fursov commited on
Commit
c115b75
1 Parent(s): 774ddb7

End of training

Browse files
Files changed (5) hide show
  1. README.md +28 -7
  2. all_results.json +13 -13
  3. eval_results.json +9 -9
  4. train_results.json +5 -5
  5. trainer_state.json +318 -12
README.md CHANGED
@@ -3,6 +3,8 @@ license: mit
3
  base_model: roberta-base
4
  tags:
5
  - generated_from_trainer
 
 
6
  metrics:
7
  - precision
8
  - recall
@@ -10,7 +12,26 @@ metrics:
10
  - accuracy
11
  model-index:
12
  - name: ner-gec-roberta-v3
13
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
  ---
15
 
16
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -18,13 +39,13 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  # ner-gec-roberta-v3
20
 
21
- This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on an unknown dataset.
22
  It achieves the following results on the evaluation set:
23
- - Loss: 0.1783
24
- - Precision: 0.5595
25
- - Recall: 0.4328
26
- - F1: 0.4880
27
- - Accuracy: 0.9554
28
 
29
  ## Model description
30
 
 
3
  base_model: roberta-base
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - fursov/gec_ner_val3
8
  metrics:
9
  - precision
10
  - recall
 
12
  - accuracy
13
  model-index:
14
  - name: ner-gec-roberta-v3
15
+ results:
16
+ - task:
17
+ name: Token Classification
18
+ type: token-classification
19
+ dataset:
20
+ name: fursov/gec_ner_val3
21
+ type: fursov/gec_ner_val3
22
+ metrics:
23
+ - name: Precision
24
+ type: precision
25
+ value: 0.5705440070765149
26
+ - name: Recall
27
+ type: recall
28
+ value: 0.43481191856545776
29
+ - name: F1
30
+ type: f1
31
+ value: 0.493515436703776
32
+ - name: Accuracy
33
+ type: accuracy
34
+ value: 0.9566099116988466
35
  ---
36
 
37
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
39
 
40
  # ner-gec-roberta-v3
41
 
42
+ This model is a fine-tuned version of [roberta-base](https://huggingface.co/roberta-base) on the fursov/gec_ner_val3 dataset.
43
  It achieves the following results on the evaluation set:
44
+ - Loss: 0.1759
45
+ - Precision: 0.5705
46
+ - Recall: 0.4348
47
+ - F1: 0.4935
48
+ - Accuracy: 0.9566
49
 
50
  ## Model description
51
 
all_results.json CHANGED
@@ -1,17 +1,17 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.9483607249989437,
4
- "eval_f1": 0.3637840032480715,
5
- "eval_loss": 0.18947799503803253,
6
- "eval_precision": 0.4573295222539812,
7
- "eval_recall": 0.30200889847647294,
8
- "eval_runtime": 7.5138,
9
  "eval_samples": 4000,
10
- "eval_samples_per_second": 532.352,
11
- "eval_steps_per_second": 66.544,
12
- "train_loss": 0.2075410250694521,
13
- "train_runtime": 306.0993,
14
  "train_samples": 55538,
15
- "train_samples_per_second": 907.189,
16
- "train_steps_per_second": 7.089
17
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9566099116988466,
4
+ "eval_f1": 0.493515436703776,
5
+ "eval_loss": 0.1758583039045334,
6
+ "eval_precision": 0.5705440070765149,
7
+ "eval_recall": 0.43481191856545776,
8
+ "eval_runtime": 7.8131,
9
  "eval_samples": 4000,
10
+ "eval_samples_per_second": 511.961,
11
+ "eval_steps_per_second": 63.995,
12
+ "train_loss": 0.06244980893376786,
13
+ "train_runtime": 304.7861,
14
  "train_samples": 55538,
15
+ "train_samples_per_second": 1822.196,
16
+ "train_steps_per_second": 14.239
17
  }
eval_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.9483607249989437,
4
- "eval_f1": 0.3637840032480715,
5
- "eval_loss": 0.18947799503803253,
6
- "eval_precision": 0.4573295222539812,
7
- "eval_recall": 0.30200889847647294,
8
- "eval_runtime": 7.5138,
9
  "eval_samples": 4000,
10
- "eval_samples_per_second": 532.352,
11
- "eval_steps_per_second": 66.544
12
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9566099116988466,
4
+ "eval_f1": 0.493515436703776,
5
+ "eval_loss": 0.1758583039045334,
6
+ "eval_precision": 0.5705440070765149,
7
+ "eval_recall": 0.43481191856545776,
8
+ "eval_runtime": 7.8131,
9
  "eval_samples": 4000,
10
+ "eval_samples_per_second": 511.961,
11
+ "eval_steps_per_second": 63.995
12
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 5.0,
3
- "train_loss": 0.2075410250694521,
4
- "train_runtime": 306.0993,
5
  "train_samples": 55538,
6
- "train_samples_per_second": 907.189,
7
- "train_steps_per_second": 7.089
8
  }
 
1
  {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.06244980893376786,
4
+ "train_runtime": 304.7861,
5
  "train_samples": 55538,
6
+ "train_samples_per_second": 1822.196,
7
+ "train_steps_per_second": 14.239
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 2170,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -315,21 +315,327 @@
315
  "step": 2150
316
  },
317
  {
318
- "epoch": 5.0,
319
- "step": 2170,
320
- "total_flos": 6755106697897248.0,
321
- "train_loss": 0.2075410250694521,
322
- "train_runtime": 306.0993,
323
- "train_samples_per_second": 907.189,
324
- "train_steps_per_second": 7.089
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
325
  }
326
  ],
327
  "logging_steps": 50,
328
- "max_steps": 2170,
329
  "num_input_tokens_seen": 0,
330
- "num_train_epochs": 5,
331
  "save_steps": 500,
332
- "total_flos": 6755106697897248.0,
333
  "train_batch_size": 128,
334
  "trial_name": null,
335
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
  "eval_steps": 500,
6
+ "global_step": 4340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
315
  "step": 2150
316
  },
317
  {
318
+ "epoch": 5.07,
319
+ "learning_rate": 2.4654377880184332e-05,
320
+ "loss": 0.1501,
321
+ "step": 2200
322
+ },
323
+ {
324
+ "epoch": 5.18,
325
+ "learning_rate": 2.4078341013824887e-05,
326
+ "loss": 0.1533,
327
+ "step": 2250
328
+ },
329
+ {
330
+ "epoch": 5.3,
331
+ "learning_rate": 2.350230414746544e-05,
332
+ "loss": 0.1555,
333
+ "step": 2300
334
+ },
335
+ {
336
+ "epoch": 5.41,
337
+ "learning_rate": 2.2926267281105994e-05,
338
+ "loss": 0.1521,
339
+ "step": 2350
340
+ },
341
+ {
342
+ "epoch": 5.53,
343
+ "learning_rate": 2.2350230414746546e-05,
344
+ "loss": 0.1549,
345
+ "step": 2400
346
+ },
347
+ {
348
+ "epoch": 5.65,
349
+ "learning_rate": 2.1774193548387097e-05,
350
+ "loss": 0.1529,
351
+ "step": 2450
352
+ },
353
+ {
354
+ "epoch": 5.76,
355
+ "learning_rate": 2.1198156682027652e-05,
356
+ "loss": 0.1546,
357
+ "step": 2500
358
+ },
359
+ {
360
+ "epoch": 5.76,
361
+ "eval_accuracy": 0.9504309434281127,
362
+ "eval_f1": 0.38409736308316433,
363
+ "eval_loss": 0.1856723576784134,
364
+ "eval_precision": 0.48227383863080686,
365
+ "eval_recall": 0.31913172441688015,
366
+ "eval_runtime": 7.5018,
367
+ "eval_samples_per_second": 533.207,
368
+ "eval_steps_per_second": 66.651,
369
+ "step": 2500
370
+ },
371
+ {
372
+ "epoch": 5.88,
373
+ "learning_rate": 2.0622119815668204e-05,
374
+ "loss": 0.152,
375
+ "step": 2550
376
+ },
377
+ {
378
+ "epoch": 5.99,
379
+ "learning_rate": 2.0046082949308755e-05,
380
+ "loss": 0.1527,
381
+ "step": 2600
382
+ },
383
+ {
384
+ "epoch": 6.11,
385
+ "learning_rate": 1.9470046082949307e-05,
386
+ "loss": 0.1373,
387
+ "step": 2650
388
+ },
389
+ {
390
+ "epoch": 6.22,
391
+ "learning_rate": 1.8894009216589862e-05,
392
+ "loss": 0.1373,
393
+ "step": 2700
394
+ },
395
+ {
396
+ "epoch": 6.34,
397
+ "learning_rate": 1.8317972350230417e-05,
398
+ "loss": 0.1355,
399
+ "step": 2750
400
+ },
401
+ {
402
+ "epoch": 6.45,
403
+ "learning_rate": 1.774193548387097e-05,
404
+ "loss": 0.1367,
405
+ "step": 2800
406
+ },
407
+ {
408
+ "epoch": 6.57,
409
+ "learning_rate": 1.7165898617511524e-05,
410
+ "loss": 0.1344,
411
+ "step": 2850
412
+ },
413
+ {
414
+ "epoch": 6.68,
415
+ "learning_rate": 1.6589861751152075e-05,
416
+ "loss": 0.135,
417
+ "step": 2900
418
+ },
419
+ {
420
+ "epoch": 6.8,
421
+ "learning_rate": 1.6013824884792627e-05,
422
+ "loss": 0.1348,
423
+ "step": 2950
424
+ },
425
+ {
426
+ "epoch": 6.91,
427
+ "learning_rate": 1.543778801843318e-05,
428
+ "loss": 0.1343,
429
+ "step": 3000
430
+ },
431
+ {
432
+ "epoch": 6.91,
433
+ "eval_accuracy": 0.9534517723604715,
434
+ "eval_f1": 0.4423137378182962,
435
+ "eval_loss": 0.17842046916484833,
436
+ "eval_precision": 0.5302430751837196,
437
+ "eval_recall": 0.3793986787110692,
438
+ "eval_runtime": 7.7899,
439
+ "eval_samples_per_second": 513.483,
440
+ "eval_steps_per_second": 64.185,
441
+ "step": 3000
442
+ },
443
+ {
444
+ "epoch": 7.03,
445
+ "learning_rate": 1.4861751152073732e-05,
446
+ "loss": 0.1315,
447
+ "step": 3050
448
+ },
449
+ {
450
+ "epoch": 7.14,
451
+ "learning_rate": 1.4285714285714285e-05,
452
+ "loss": 0.1194,
453
+ "step": 3100
454
+ },
455
+ {
456
+ "epoch": 7.26,
457
+ "learning_rate": 1.3709677419354839e-05,
458
+ "loss": 0.1237,
459
+ "step": 3150
460
+ },
461
+ {
462
+ "epoch": 7.37,
463
+ "learning_rate": 1.313364055299539e-05,
464
+ "loss": 0.1183,
465
+ "step": 3200
466
+ },
467
+ {
468
+ "epoch": 7.49,
469
+ "learning_rate": 1.2557603686635947e-05,
470
+ "loss": 0.1231,
471
+ "step": 3250
472
+ },
473
+ {
474
+ "epoch": 7.6,
475
+ "learning_rate": 1.1981566820276497e-05,
476
+ "loss": 0.1209,
477
+ "step": 3300
478
+ },
479
+ {
480
+ "epoch": 7.72,
481
+ "learning_rate": 1.1405529953917052e-05,
482
+ "loss": 0.1209,
483
+ "step": 3350
484
+ },
485
+ {
486
+ "epoch": 7.83,
487
+ "learning_rate": 1.0829493087557604e-05,
488
+ "loss": 0.1213,
489
+ "step": 3400
490
+ },
491
+ {
492
+ "epoch": 7.95,
493
+ "learning_rate": 1.0253456221198157e-05,
494
+ "loss": 0.1218,
495
+ "step": 3450
496
+ },
497
+ {
498
+ "epoch": 8.06,
499
+ "learning_rate": 9.67741935483871e-06,
500
+ "loss": 0.1163,
501
+ "step": 3500
502
+ },
503
+ {
504
+ "epoch": 8.06,
505
+ "eval_accuracy": 0.9556170518399595,
506
+ "eval_f1": 0.47168932038834954,
507
+ "eval_loss": 0.17665258049964905,
508
+ "eval_precision": 0.55633931843166,
509
+ "eval_recall": 0.4093973304570581,
510
+ "eval_runtime": 7.4971,
511
+ "eval_samples_per_second": 533.536,
512
+ "eval_steps_per_second": 66.692,
513
+ "step": 3500
514
+ },
515
+ {
516
+ "epoch": 8.18,
517
+ "learning_rate": 9.101382488479262e-06,
518
+ "loss": 0.111,
519
+ "step": 3550
520
+ },
521
+ {
522
+ "epoch": 8.29,
523
+ "learning_rate": 8.525345622119817e-06,
524
+ "loss": 0.112,
525
+ "step": 3600
526
+ },
527
+ {
528
+ "epoch": 8.41,
529
+ "learning_rate": 7.949308755760369e-06,
530
+ "loss": 0.1106,
531
+ "step": 3650
532
+ },
533
+ {
534
+ "epoch": 8.53,
535
+ "learning_rate": 7.373271889400922e-06,
536
+ "loss": 0.1098,
537
+ "step": 3700
538
+ },
539
+ {
540
+ "epoch": 8.64,
541
+ "learning_rate": 6.7972350230414745e-06,
542
+ "loss": 0.1125,
543
+ "step": 3750
544
+ },
545
+ {
546
+ "epoch": 8.76,
547
+ "learning_rate": 6.221198156682028e-06,
548
+ "loss": 0.1099,
549
+ "step": 3800
550
+ },
551
+ {
552
+ "epoch": 8.87,
553
+ "learning_rate": 5.64516129032258e-06,
554
+ "loss": 0.1113,
555
+ "step": 3850
556
+ },
557
+ {
558
+ "epoch": 8.99,
559
+ "learning_rate": 5.0691244239631346e-06,
560
+ "loss": 0.1079,
561
+ "step": 3900
562
+ },
563
+ {
564
+ "epoch": 9.1,
565
+ "learning_rate": 4.493087557603687e-06,
566
+ "loss": 0.1048,
567
+ "step": 3950
568
+ },
569
+ {
570
+ "epoch": 9.22,
571
+ "learning_rate": 3.9170506912442395e-06,
572
+ "loss": 0.1045,
573
+ "step": 4000
574
+ },
575
+ {
576
+ "epoch": 9.22,
577
+ "eval_accuracy": 0.9554269297393215,
578
+ "eval_f1": 0.48804591584628837,
579
+ "eval_loss": 0.17828913033008575,
580
+ "eval_precision": 0.5594771241830065,
581
+ "eval_recall": 0.4327895375488742,
582
+ "eval_runtime": 7.85,
583
+ "eval_samples_per_second": 509.557,
584
+ "eval_steps_per_second": 63.695,
585
+ "step": 4000
586
+ },
587
+ {
588
+ "epoch": 9.33,
589
+ "learning_rate": 3.341013824884793e-06,
590
+ "loss": 0.1054,
591
+ "step": 4050
592
+ },
593
+ {
594
+ "epoch": 9.45,
595
+ "learning_rate": 2.7649769585253458e-06,
596
+ "loss": 0.1061,
597
+ "step": 4100
598
+ },
599
+ {
600
+ "epoch": 9.56,
601
+ "learning_rate": 2.1889400921658987e-06,
602
+ "loss": 0.1008,
603
+ "step": 4150
604
+ },
605
+ {
606
+ "epoch": 9.68,
607
+ "learning_rate": 1.6129032258064516e-06,
608
+ "loss": 0.1024,
609
+ "step": 4200
610
+ },
611
+ {
612
+ "epoch": 9.79,
613
+ "learning_rate": 1.0368663594470047e-06,
614
+ "loss": 0.1034,
615
+ "step": 4250
616
+ },
617
+ {
618
+ "epoch": 9.91,
619
+ "learning_rate": 4.6082949308755763e-07,
620
+ "loss": 0.1048,
621
+ "step": 4300
622
+ },
623
+ {
624
+ "epoch": 10.0,
625
+ "step": 4340,
626
+ "total_flos": 1.3513697075311152e+16,
627
+ "train_loss": 0.06244980893376786,
628
+ "train_runtime": 304.7861,
629
+ "train_samples_per_second": 1822.196,
630
+ "train_steps_per_second": 14.239
631
  }
632
  ],
633
  "logging_steps": 50,
634
+ "max_steps": 4340,
635
  "num_input_tokens_seen": 0,
636
+ "num_train_epochs": 10,
637
  "save_steps": 500,
638
+ "total_flos": 1.3513697075311152e+16,
639
  "train_batch_size": 128,
640
  "trial_name": null,
641
  "trial_params": null