akahana commited on
Commit
a0ec623
·
verified ·
1 Parent(s): 6addd0c

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.2780392959476054
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 5.0243
32
+ - Accuracy: 0.2780
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.16528946828166702,
4
- "eval_loss": 6.006351470947266,
5
- "eval_runtime": 28.9332,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 140.081,
8
- "eval_steps_per_second": 35.046,
9
- "perplexity": 405.99931440686527,
10
- "total_flos": 4.223778440028365e+16,
11
- "train_loss": 2.356345704317949,
12
- "train_runtime": 3822.1574,
13
  "train_samples": 80219,
14
- "train_samples_per_second": 167.903,
15
- "train_steps_per_second": 10.495
16
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "eval_accuracy": 0.2780392959476054,
4
+ "eval_loss": 5.024305820465088,
5
+ "eval_runtime": 29.6763,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 136.574,
8
+ "eval_steps_per_second": 34.169,
9
+ "perplexity": 152.0646592908706,
10
+ "total_flos": 5.807695355039002e+16,
11
+ "train_loss": 1.5156397336923944,
12
+ "train_runtime": 4860.501,
13
  "train_samples": 80219,
14
+ "train_samples_per_second": 181.547,
15
+ "train_steps_per_second": 11.347
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 8.0,
3
- "eval_accuracy": 0.16528946828166702,
4
- "eval_loss": 6.006351470947266,
5
- "eval_runtime": 28.9332,
6
  "eval_samples": 4053,
7
- "eval_samples_per_second": 140.081,
8
- "eval_steps_per_second": 35.046,
9
- "perplexity": 405.99931440686527
10
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "eval_accuracy": 0.2780392959476054,
4
+ "eval_loss": 5.024305820465088,
5
+ "eval_runtime": 29.6763,
6
  "eval_samples": 4053,
7
+ "eval_samples_per_second": 136.574,
8
+ "eval_steps_per_second": 34.169,
9
+ "perplexity": 152.0646592908706
10
  }
runs/Jul12_23-21-44_6b2d4ff0fae4/events.out.tfevents.1720831481.6b2d4ff0fae4.19196.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44b6f3370c90f02699f03b53af768b8af403f3efab6f1ce5ae19e325c7edf30f
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 8.0,
3
- "total_flos": 4.223778440028365e+16,
4
- "train_loss": 2.356345704317949,
5
- "train_runtime": 3822.1574,
6
  "train_samples": 80219,
7
- "train_samples_per_second": 167.903,
8
- "train_steps_per_second": 10.495
9
  }
 
1
  {
2
+ "epoch": 11.0,
3
+ "total_flos": 5.807695355039002e+16,
4
+ "train_loss": 1.5156397336923944,
5
+ "train_runtime": 4860.501,
6
  "train_samples": 80219,
7
+ "train_samples_per_second": 181.547,
8
+ "train_steps_per_second": 11.347
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.0,
5
  "eval_steps": 500,
6
- "global_step": 40112,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -578,19 +578,229 @@
578
  "step": 40000
579
  },
580
  {
581
- "epoch": 8.0,
582
- "step": 40112,
583
- "total_flos": 4.223778440028365e+16,
584
- "train_loss": 2.356345704317949,
585
- "train_runtime": 3822.1574,
586
- "train_samples_per_second": 167.903,
587
- "train_steps_per_second": 10.495
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
588
  }
589
  ],
590
  "logging_steps": 500,
591
- "max_steps": 40112,
592
  "num_input_tokens_seen": 0,
593
- "num_train_epochs": 8,
594
  "save_steps": 500,
595
  "stateful_callbacks": {
596
  "TrainerControl": {
@@ -604,7 +814,7 @@
604
  "attributes": {}
605
  }
606
  },
607
- "total_flos": 4.223778440028365e+16,
608
  "train_batch_size": 16,
609
  "trial_name": null,
610
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 11.0,
5
  "eval_steps": 500,
6
+ "global_step": 55154,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
578
  "step": 40000
579
  },
580
  {
581
+ "epoch": 8.07738332668528,
582
+ "grad_norm": 5.084179401397705,
583
+ "learning_rate": 3.601552017986003e-05,
584
+ "loss": 5.9368,
585
+ "step": 40500
586
+ },
587
+ {
588
+ "epoch": 8.177104108496211,
589
+ "grad_norm": 5.475657939910889,
590
+ "learning_rate": 3.556224389890126e-05,
591
+ "loss": 5.9181,
592
+ "step": 41000
593
+ },
594
+ {
595
+ "epoch": 8.27682489030714,
596
+ "grad_norm": 4.678411960601807,
597
+ "learning_rate": 3.510896761794249e-05,
598
+ "loss": 5.8795,
599
+ "step": 41500
600
+ },
601
+ {
602
+ "epoch": 8.37654567211807,
603
+ "grad_norm": 5.502169132232666,
604
+ "learning_rate": 3.465569133698372e-05,
605
+ "loss": 5.8389,
606
+ "step": 42000
607
+ },
608
+ {
609
+ "epoch": 8.476266453928998,
610
+ "grad_norm": 5.32131290435791,
611
+ "learning_rate": 3.420241505602495e-05,
612
+ "loss": 5.8329,
613
+ "step": 42500
614
+ },
615
+ {
616
+ "epoch": 8.575987235739928,
617
+ "grad_norm": 5.6808552742004395,
618
+ "learning_rate": 3.374913877506618e-05,
619
+ "loss": 5.8001,
620
+ "step": 43000
621
+ },
622
+ {
623
+ "epoch": 8.675708017550857,
624
+ "grad_norm": 4.988351821899414,
625
+ "learning_rate": 3.329586249410741e-05,
626
+ "loss": 5.7928,
627
+ "step": 43500
628
+ },
629
+ {
630
+ "epoch": 8.775428799361787,
631
+ "grad_norm": 5.559896469116211,
632
+ "learning_rate": 3.284258621314864e-05,
633
+ "loss": 5.7488,
634
+ "step": 44000
635
+ },
636
+ {
637
+ "epoch": 8.875149581172716,
638
+ "grad_norm": 6.084516525268555,
639
+ "learning_rate": 3.238930993218987e-05,
640
+ "loss": 5.7262,
641
+ "step": 44500
642
+ },
643
+ {
644
+ "epoch": 8.974870362983646,
645
+ "grad_norm": 6.219081401824951,
646
+ "learning_rate": 3.19360336512311e-05,
647
+ "loss": 5.6925,
648
+ "step": 45000
649
+ },
650
+ {
651
+ "epoch": 9.074591144794574,
652
+ "grad_norm": 6.170139789581299,
653
+ "learning_rate": 3.1482757370272333e-05,
654
+ "loss": 5.6491,
655
+ "step": 45500
656
+ },
657
+ {
658
+ "epoch": 9.174311926605505,
659
+ "grad_norm": 5.830073356628418,
660
+ "learning_rate": 3.102948108931356e-05,
661
+ "loss": 5.6228,
662
+ "step": 46000
663
+ },
664
+ {
665
+ "epoch": 9.274032708416435,
666
+ "grad_norm": 5.452333927154541,
667
+ "learning_rate": 3.0577111360916706e-05,
668
+ "loss": 5.5724,
669
+ "step": 46500
670
+ },
671
+ {
672
+ "epoch": 9.373753490227363,
673
+ "grad_norm": 5.113864421844482,
674
+ "learning_rate": 3.0123835079957935e-05,
675
+ "loss": 5.5437,
676
+ "step": 47000
677
+ },
678
+ {
679
+ "epoch": 9.473474272038294,
680
+ "grad_norm": 5.875530242919922,
681
+ "learning_rate": 2.9670558798999164e-05,
682
+ "loss": 5.525,
683
+ "step": 47500
684
+ },
685
+ {
686
+ "epoch": 9.573195053849222,
687
+ "grad_norm": 5.342255592346191,
688
+ "learning_rate": 2.9217282518040397e-05,
689
+ "loss": 5.5145,
690
+ "step": 48000
691
+ },
692
+ {
693
+ "epoch": 9.672915835660152,
694
+ "grad_norm": 6.1103644371032715,
695
+ "learning_rate": 2.8764006237081626e-05,
696
+ "loss": 5.4687,
697
+ "step": 48500
698
+ },
699
+ {
700
+ "epoch": 9.77263661747108,
701
+ "grad_norm": 6.640170097351074,
702
+ "learning_rate": 2.8310729956122855e-05,
703
+ "loss": 5.4448,
704
+ "step": 49000
705
+ },
706
+ {
707
+ "epoch": 9.872357399282011,
708
+ "grad_norm": 6.135842323303223,
709
+ "learning_rate": 2.7858360227726005e-05,
710
+ "loss": 5.4075,
711
+ "step": 49500
712
+ },
713
+ {
714
+ "epoch": 9.97207818109294,
715
+ "grad_norm": 6.063602924346924,
716
+ "learning_rate": 2.7405083946767234e-05,
717
+ "loss": 5.374,
718
+ "step": 50000
719
+ },
720
+ {
721
+ "epoch": 10.07179896290387,
722
+ "grad_norm": 6.689053535461426,
723
+ "learning_rate": 2.6951807665808463e-05,
724
+ "loss": 5.3459,
725
+ "step": 50500
726
+ },
727
+ {
728
+ "epoch": 10.171519744714798,
729
+ "grad_norm": 6.488341331481934,
730
+ "learning_rate": 2.6498531384849696e-05,
731
+ "loss": 5.3185,
732
+ "step": 51000
733
+ },
734
+ {
735
+ "epoch": 10.271240526525728,
736
+ "grad_norm": 6.589330673217773,
737
+ "learning_rate": 2.6045255103890925e-05,
738
+ "loss": 5.3019,
739
+ "step": 51500
740
+ },
741
+ {
742
+ "epoch": 10.370961308336657,
743
+ "grad_norm": 6.61977481842041,
744
+ "learning_rate": 2.5592885375494075e-05,
745
+ "loss": 5.2792,
746
+ "step": 52000
747
+ },
748
+ {
749
+ "epoch": 10.470682090147587,
750
+ "grad_norm": 6.396610736846924,
751
+ "learning_rate": 2.5139609094535304e-05,
752
+ "loss": 5.2347,
753
+ "step": 52500
754
+ },
755
+ {
756
+ "epoch": 10.570402871958516,
757
+ "grad_norm": 7.000791549682617,
758
+ "learning_rate": 2.4686332813576534e-05,
759
+ "loss": 5.2252,
760
+ "step": 53000
761
+ },
762
+ {
763
+ "epoch": 10.670123653769446,
764
+ "grad_norm": 6.714987277984619,
765
+ "learning_rate": 2.4233056532617763e-05,
766
+ "loss": 5.1965,
767
+ "step": 53500
768
+ },
769
+ {
770
+ "epoch": 10.769844435580374,
771
+ "grad_norm": 7.012180805206299,
772
+ "learning_rate": 2.3779780251658992e-05,
773
+ "loss": 5.1769,
774
+ "step": 54000
775
+ },
776
+ {
777
+ "epoch": 10.869565217391305,
778
+ "grad_norm": 6.85835599899292,
779
+ "learning_rate": 2.332650397070022e-05,
780
+ "loss": 5.1442,
781
+ "step": 54500
782
+ },
783
+ {
784
+ "epoch": 10.969285999202233,
785
+ "grad_norm": 6.789878845214844,
786
+ "learning_rate": 2.2873227689741453e-05,
787
+ "loss": 5.1071,
788
+ "step": 55000
789
+ },
790
+ {
791
+ "epoch": 11.0,
792
+ "step": 55154,
793
+ "total_flos": 5.807695355039002e+16,
794
+ "train_loss": 1.5156397336923944,
795
+ "train_runtime": 4860.501,
796
+ "train_samples_per_second": 181.547,
797
+ "train_steps_per_second": 11.347
798
  }
799
  ],
800
  "logging_steps": 500,
801
+ "max_steps": 55154,
802
  "num_input_tokens_seen": 0,
803
+ "num_train_epochs": 11,
804
  "save_steps": 500,
805
  "stateful_callbacks": {
806
  "TrainerControl": {
 
814
  "attributes": {}
815
  }
816
  },
817
+ "total_flos": 5.807695355039002e+16,
818
  "train_batch_size": 16,
819
  "trial_name": null,
820
  "trial_params": null