akahana commited on
Commit
3f238c0
1 Parent(s): de6d598

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.16528946828166702
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 6.0064
32
+ - Accuracy: 0.1653
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.14735192890532697,
4
- "eval_loss": 6.437006950378418,
5
- "eval_runtime": 32.1887,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 125.914,
8
- "eval_steps_per_second": 31.502,
9
- "perplexity": 624.5347361636025,
10
- "total_flos": 2.639861525017728e+16,
11
- "train_loss": 0.0,
12
- "train_runtime": 0.0088,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 45629508.88,
15
- "train_steps_per_second": 2852022.059
16
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.16528946828166702,
4
+ "eval_loss": 6.006351470947266,
5
+ "eval_runtime": 28.9332,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 140.081,
8
+ "eval_steps_per_second": 35.046,
9
+ "perplexity": 405.99931440686527,
10
+ "total_flos": 4.223778440028365e+16,
11
+ "train_loss": 2.356345704317949,
12
+ "train_runtime": 3822.1574,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 167.903,
15
+ "train_steps_per_second": 10.495
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 5.0,
3
- "eval_accuracy": 0.14735192890532697,
4
- "eval_loss": 6.437006950378418,
5
- "eval_runtime": 32.1887,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 125.914,
8
- "eval_steps_per_second": 31.502,
9
- "perplexity": 624.5347361636025
10
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "eval_accuracy": 0.16528946828166702,
4
+ "eval_loss": 6.006351470947266,
5
+ "eval_runtime": 28.9332,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 140.081,
8
+ "eval_steps_per_second": 35.046,
9
+ "perplexity": 405.99931440686527
10
  }
runs/Jul12_22-15-22_6b2d4ff0fae4/events.out.tfevents.1720826418.6b2d4ff0fae4.2315.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2115f7e967f854d07cf94877bdb52cbf1ec524233de60bab2ea57c6dde3d51a8
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 5.0,
3
- "total_flos": 2.639861525017728e+16,
4
- "train_loss": 0.0,
5
- "train_runtime": 0.0088,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 45629508.88,
8
- "train_steps_per_second": 2852022.059
9
  }
 
1
  {
2
+ "epoch": 8.0,
3
+ "total_flos": 4.223778440028365e+16,
4
+ "train_loss": 2.356345704317949,
5
+ "train_runtime": 3822.1574,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 167.903,
8
+ "train_steps_per_second": 10.495
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 5.0,
5
  "eval_steps": 500,
6
- "global_step": 25070,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -368,19 +368,229 @@
368
  "train_steps_per_second": 3.857
369
  },
370
  {
371
- "epoch": 5.0,
372
- "step": 25070,
373
- "total_flos": 2.639861525017728e+16,
374
- "train_loss": 0.0,
375
- "train_runtime": 0.0088,
376
- "train_samples_per_second": 45629508.88,
377
- "train_steps_per_second": 2852022.059
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
378
  }
379
  ],
380
  "logging_steps": 500,
381
- "max_steps": 25070,
382
  "num_input_tokens_seen": 0,
383
- "num_train_epochs": 5,
384
  "save_steps": 500,
385
  "stateful_callbacks": {
386
  "TrainerControl": {
@@ -394,7 +604,7 @@
394
  "attributes": {}
395
  }
396
  },
397
- "total_flos": 2.639861525017728e+16,
398
  "train_batch_size": 16,
399
  "trial_name": null,
400
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.0,
5
  "eval_steps": 500,
6
+ "global_step": 40112,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
368
  "train_steps_per_second": 3.857
369
  },
370
  {
371
+ "epoch": 5.085759872357399,
372
+ "grad_norm": 3.5418105125427246,
373
+ "learning_rate": 4.946400079776626e-05,
374
+ "loss": 6.5458,
375
+ "step": 25500
376
+ },
377
+ {
378
+ "epoch": 5.1854806541683285,
379
+ "grad_norm": 4.323005676269531,
380
+ "learning_rate": 4.884074591144795e-05,
381
+ "loss": 6.5604,
382
+ "step": 26000
383
+ },
384
+ {
385
+ "epoch": 5.285201435979258,
386
+ "grad_norm": 4.445618629455566,
387
+ "learning_rate": 4.8217491025129644e-05,
388
+ "loss": 6.5452,
389
+ "step": 26500
390
+ },
391
+ {
392
+ "epoch": 5.384922217790187,
393
+ "grad_norm": 4.320890426635742,
394
+ "learning_rate": 4.759423613881133e-05,
395
+ "loss": 6.5239,
396
+ "step": 27000
397
+ },
398
+ {
399
+ "epoch": 5.484642999601117,
400
+ "grad_norm": 3.8980209827423096,
401
+ "learning_rate": 4.697098125249302e-05,
402
+ "loss": 6.5278,
403
+ "step": 27500
404
+ },
405
+ {
406
+ "epoch": 5.584363781412046,
407
+ "grad_norm": 4.074916362762451,
408
+ "learning_rate": 4.6347726366174716e-05,
409
+ "loss": 6.5044,
410
+ "step": 28000
411
+ },
412
+ {
413
+ "epoch": 5.684084563222975,
414
+ "grad_norm": 4.465285778045654,
415
+ "learning_rate": 4.572447147985641e-05,
416
+ "loss": 6.472,
417
+ "step": 28500
418
+ },
419
+ {
420
+ "epoch": 5.783805345033905,
421
+ "grad_norm": 4.351347923278809,
422
+ "learning_rate": 4.5101216593538095e-05,
423
+ "loss": 6.4504,
424
+ "step": 29000
425
+ },
426
+ {
427
+ "epoch": 5.883526126844835,
428
+ "grad_norm": 4.14565372467041,
429
+ "learning_rate": 4.447796170721978e-05,
430
+ "loss": 6.4375,
431
+ "step": 29500
432
+ },
433
+ {
434
+ "epoch": 5.983246908655763,
435
+ "grad_norm": 4.669959545135498,
436
+ "learning_rate": 4.3854706820901474e-05,
437
+ "loss": 6.4393,
438
+ "step": 30000
439
+ },
440
+ {
441
+ "epoch": 6.082967690466694,
442
+ "grad_norm": 4.345717430114746,
443
+ "learning_rate": 4.323145193458317e-05,
444
+ "loss": 6.3808,
445
+ "step": 30500
446
+ },
447
+ {
448
+ "epoch": 6.182688472277623,
449
+ "grad_norm": 4.040054798126221,
450
+ "learning_rate": 4.260819704826486e-05,
451
+ "loss": 6.3705,
452
+ "step": 31000
453
+ },
454
+ {
455
+ "epoch": 6.282409254088552,
456
+ "grad_norm": 4.663171291351318,
457
+ "learning_rate": 4.198618867171919e-05,
458
+ "loss": 6.3803,
459
+ "step": 31500
460
+ },
461
+ {
462
+ "epoch": 6.382130035899482,
463
+ "grad_norm": 4.45890474319458,
464
+ "learning_rate": 4.136293378540088e-05,
465
+ "loss": 6.3256,
466
+ "step": 32000
467
+ },
468
+ {
469
+ "epoch": 6.481850817710411,
470
+ "grad_norm": 4.158110618591309,
471
+ "learning_rate": 4.073967889908257e-05,
472
+ "loss": 6.3351,
473
+ "step": 32500
474
+ },
475
+ {
476
+ "epoch": 6.58157159952134,
477
+ "grad_norm": 4.460795879364014,
478
+ "learning_rate": 4.0116424012764265e-05,
479
+ "loss": 6.3137,
480
+ "step": 33000
481
+ },
482
+ {
483
+ "epoch": 6.68129238133227,
484
+ "grad_norm": 4.767895221710205,
485
+ "learning_rate": 3.949316912644596e-05,
486
+ "loss": 6.2751,
487
+ "step": 33500
488
+ },
489
+ {
490
+ "epoch": 6.781013163143199,
491
+ "grad_norm": 4.399994850158691,
492
+ "learning_rate": 3.887116074990028e-05,
493
+ "loss": 6.2345,
494
+ "step": 34000
495
+ },
496
+ {
497
+ "epoch": 6.8807339449541285,
498
+ "grad_norm": 4.522914886474609,
499
+ "learning_rate": 3.8247905863581976e-05,
500
+ "loss": 6.218,
501
+ "step": 34500
502
+ },
503
+ {
504
+ "epoch": 6.980454726765058,
505
+ "grad_norm": 4.697731018066406,
506
+ "learning_rate": 3.762465097726366e-05,
507
+ "loss": 6.1819,
508
+ "step": 35000
509
+ },
510
+ {
511
+ "epoch": 7.080175508575987,
512
+ "grad_norm": 5.113608360290527,
513
+ "learning_rate": 3.7001396090945355e-05,
514
+ "loss": 6.1566,
515
+ "step": 35500
516
+ },
517
+ {
518
+ "epoch": 7.179896290386917,
519
+ "grad_norm": 4.987142086029053,
520
+ "learning_rate": 3.637814120462705e-05,
521
+ "loss": 6.1504,
522
+ "step": 36000
523
+ },
524
+ {
525
+ "epoch": 7.279617072197846,
526
+ "grad_norm": 4.797494888305664,
527
+ "learning_rate": 3.5756132828081373e-05,
528
+ "loss": 6.0915,
529
+ "step": 36500
530
+ },
531
+ {
532
+ "epoch": 7.379337854008775,
533
+ "grad_norm": 5.114543437957764,
534
+ "learning_rate": 3.5132877941763066e-05,
535
+ "loss": 6.0859,
536
+ "step": 37000
537
+ },
538
+ {
539
+ "epoch": 7.479058635819705,
540
+ "grad_norm": 5.5212721824646,
541
+ "learning_rate": 3.450962305544476e-05,
542
+ "loss": 6.0643,
543
+ "step": 37500
544
+ },
545
+ {
546
+ "epoch": 7.578779417630634,
547
+ "grad_norm": 4.77981424331665,
548
+ "learning_rate": 3.3886368169126446e-05,
549
+ "loss": 6.038,
550
+ "step": 38000
551
+ },
552
+ {
553
+ "epoch": 7.678500199441563,
554
+ "grad_norm": 5.6912760734558105,
555
+ "learning_rate": 3.326311328280814e-05,
556
+ "loss": 6.0327,
557
+ "step": 38500
558
+ },
559
+ {
560
+ "epoch": 7.778220981252493,
561
+ "grad_norm": 5.021594524383545,
562
+ "learning_rate": 3.2641104906262464e-05,
563
+ "loss": 6.0089,
564
+ "step": 39000
565
+ },
566
+ {
567
+ "epoch": 7.877941763063422,
568
+ "grad_norm": 4.9512410163879395,
569
+ "learning_rate": 3.201785001994416e-05,
570
+ "loss": 5.9914,
571
+ "step": 39500
572
+ },
573
+ {
574
+ "epoch": 7.9776625448743514,
575
+ "grad_norm": 4.6659088134765625,
576
+ "learning_rate": 3.139459513362585e-05,
577
+ "loss": 5.9688,
578
+ "step": 40000
579
+ },
580
+ {
581
+ "epoch": 8.0,
582
+ "step": 40112,
583
+ "total_flos": 4.223778440028365e+16,
584
+ "train_loss": 2.356345704317949,
585
+ "train_runtime": 3822.1574,
586
+ "train_samples_per_second": 167.903,
587
+ "train_steps_per_second": 10.495
588
  }
589
  ],
590
  "logging_steps": 500,
591
+ "max_steps": 40112,
592
  "num_input_tokens_seen": 0,
593
+ "num_train_epochs": 8,
594
  "save_steps": 500,
595
  "stateful_callbacks": {
596
  "TrainerControl": {
 
604
  "attributes": {}
605
  }
606
  },
607
+ "total_flos": 4.223778440028365e+16,
608
  "train_batch_size": 16,
609
  "trial_name": null,
610
  "trial_params": null