akahana commited on
Commit
832ba1d
1 Parent(s): fea2522

End of training

Browse files
README.md CHANGED
@@ -1,9 +1,24 @@
1
  ---
2
  tags:
3
  - generated_from_trainer
 
 
 
 
4
  model-index:
5
  - name: mini-roberta-javanese
6
- results: []
 
 
 
 
 
 
 
 
 
 
 
7
  ---
8
 
9
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -11,7 +26,10 @@ should probably proofread and complete it, then remove this comment. -->
11
 
12
  # mini-roberta-javanese
13
 
14
- This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
 
 
 
15
 
16
  ## Model description
17
 
 
1
  ---
2
  tags:
3
  - generated_from_trainer
4
+ datasets:
5
+ - akahana/GlotCC-V1-jav-Latn
6
+ metrics:
7
+ - accuracy
8
  model-index:
9
  - name: mini-roberta-javanese
10
+ results:
11
+ - task:
12
+ name: Masked Language Modeling
13
+ type: fill-mask
14
+ dataset:
15
+ name: akahana/GlotCC-V1-jav-Latn default
16
+ type: akahana/GlotCC-V1-jav-Latn
17
+ args: default
18
+ metrics:
19
+ - name: Accuracy
20
+ type: accuracy
21
+ value: 0.15905819453012543
22
  ---
23
 
24
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
26
 
27
  # mini-roberta-javanese
28
 
29
+ This model is a fine-tuned version of [](https://huggingface.co/) on the akahana/GlotCC-V1-jav-Latn default dataset.
30
+ It achieves the following results on the evaluation set:
31
+ - Loss: 6.0607
32
+ - Accuracy: 0.1591
33
 
34
  ## Model description
35
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.14698866640019598,
4
- "eval_loss": 6.374551773071289,
5
- "eval_runtime": 52.0555,
6
  "eval_samples": 8106,
7
- "eval_samples_per_second": 155.718,
8
- "eval_steps_per_second": 38.939,
9
- "perplexity": 586.7223872208433,
10
- "total_flos": 8866386447212160.0,
11
- "train_loss": 2.1665261722384206,
12
- "train_runtime": 1599.9695,
13
  "train_samples": 160441,
14
- "train_samples_per_second": 1504.163,
15
- "train_steps_per_second": 11.756
16
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.15905819453012543,
4
+ "eval_loss": 6.060703277587891,
5
+ "eval_runtime": 50.7232,
6
  "eval_samples": 8106,
7
+ "eval_samples_per_second": 159.809,
8
+ "eval_steps_per_second": 39.962,
9
+ "perplexity": 428.67680966518134,
10
+ "total_flos": 1.773277289442432e+16,
11
+ "train_loss": 3.0397992164007466,
12
+ "train_runtime": 4647.9256,
13
  "train_samples": 160441,
14
+ "train_samples_per_second": 1035.565,
15
+ "train_steps_per_second": 8.094
16
  }
eval_results.json CHANGED
@@ -1,10 +1,10 @@
1
  {
2
- "epoch": 15.0,
3
- "eval_accuracy": 0.14698866640019598,
4
- "eval_loss": 6.374551773071289,
5
- "eval_runtime": 52.0555,
6
  "eval_samples": 8106,
7
- "eval_samples_per_second": 155.718,
8
- "eval_steps_per_second": 38.939,
9
- "perplexity": 586.7223872208433
10
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "eval_accuracy": 0.15905819453012543,
4
+ "eval_loss": 6.060703277587891,
5
+ "eval_runtime": 50.7232,
6
  "eval_samples": 8106,
7
+ "eval_samples_per_second": 159.809,
8
+ "eval_steps_per_second": 39.962,
9
+ "perplexity": 428.67680966518134
10
  }
runs/Jul18_22-46-23_801ec3347245/events.out.tfevents.1721347554.801ec3347245.3797.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37dfeb6ab6ccc3320bff59200ee03313745fac3d9af531bee95b8616e9c43105
3
+ size 417
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 15.0,
3
- "total_flos": 8866386447212160.0,
4
- "train_loss": 2.1665261722384206,
5
- "train_runtime": 1599.9695,
6
  "train_samples": 160441,
7
- "train_samples_per_second": 1504.163,
8
- "train_steps_per_second": 11.756
9
  }
 
1
  {
2
+ "epoch": 30.0,
3
+ "total_flos": 1.773277289442432e+16,
4
+ "train_loss": 3.0397992164007466,
5
+ "train_runtime": 4647.9256,
6
  "train_samples": 160441,
7
+ "train_samples_per_second": 1035.565,
8
+ "train_steps_per_second": 8.094
9
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 15.0,
5
  "eval_steps": 500,
6
- "global_step": 18810,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -284,12 +284,287 @@
284
  "train_runtime": 1599.9695,
285
  "train_samples_per_second": 1504.163,
286
  "train_steps_per_second": 11.756
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  }
288
  ],
289
  "logging_steps": 500,
290
- "max_steps": 18810,
291
  "num_input_tokens_seen": 0,
292
- "num_train_epochs": 15,
293
  "save_steps": 500,
294
  "stateful_callbacks": {
295
  "TrainerControl": {
@@ -303,7 +578,7 @@
303
  "attributes": {}
304
  }
305
  },
306
- "total_flos": 8866386447212160.0,
307
  "train_batch_size": 128,
308
  "trial_name": null,
309
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 30.0,
5
  "eval_steps": 500,
6
+ "global_step": 37620,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
284
  "train_runtime": 1599.9695,
285
  "train_samples_per_second": 1504.163,
286
  "train_steps_per_second": 11.756
287
+ },
288
+ {
289
+ "epoch": 15.151515151515152,
290
+ "grad_norm": 2.2735719680786133,
291
+ "learning_rate": 4.974747474747475e-05,
292
+ "loss": 6.3524,
293
+ "step": 19000
294
+ },
295
+ {
296
+ "epoch": 15.55023923444976,
297
+ "grad_norm": 3.0628674030303955,
298
+ "learning_rate": 4.9082934609250406e-05,
299
+ "loss": 6.3525,
300
+ "step": 19500
301
+ },
302
+ {
303
+ "epoch": 15.94896331738437,
304
+ "grad_norm": 2.572157382965088,
305
+ "learning_rate": 4.841839447102605e-05,
306
+ "loss": 6.3283,
307
+ "step": 20000
308
+ },
309
+ {
310
+ "epoch": 16.34768740031898,
311
+ "grad_norm": 2.8556103706359863,
312
+ "learning_rate": 4.775385433280171e-05,
313
+ "loss": 6.3006,
314
+ "step": 20500
315
+ },
316
+ {
317
+ "epoch": 16.74641148325359,
318
+ "grad_norm": 2.7908451557159424,
319
+ "learning_rate": 4.7089314194577354e-05,
320
+ "loss": 6.2868,
321
+ "step": 21000
322
+ },
323
+ {
324
+ "epoch": 17.1451355661882,
325
+ "grad_norm": 2.637225389480591,
326
+ "learning_rate": 4.6424774056353006e-05,
327
+ "loss": 6.2645,
328
+ "step": 21500
329
+ },
330
+ {
331
+ "epoch": 17.54385964912281,
332
+ "grad_norm": 2.9248273372650146,
333
+ "learning_rate": 4.576023391812866e-05,
334
+ "loss": 6.24,
335
+ "step": 22000
336
+ },
337
+ {
338
+ "epoch": 17.942583732057415,
339
+ "grad_norm": 2.293976306915283,
340
+ "learning_rate": 4.509569377990431e-05,
341
+ "loss": 6.2331,
342
+ "step": 22500
343
+ },
344
+ {
345
+ "epoch": 18.341307814992025,
346
+ "grad_norm": 2.7412922382354736,
347
+ "learning_rate": 4.443115364167996e-05,
348
+ "loss": 6.1999,
349
+ "step": 23000
350
+ },
351
+ {
352
+ "epoch": 18.740031897926634,
353
+ "grad_norm": 3.194049119949341,
354
+ "learning_rate": 4.376661350345561e-05,
355
+ "loss": 6.1953,
356
+ "step": 23500
357
+ },
358
+ {
359
+ "epoch": 19.138755980861244,
360
+ "grad_norm": 2.366976022720337,
361
+ "learning_rate": 4.310207336523126e-05,
362
+ "loss": 6.1713,
363
+ "step": 24000
364
+ },
365
+ {
366
+ "epoch": 19.537480063795854,
367
+ "grad_norm": 2.459841728210449,
368
+ "learning_rate": 4.2437533227006915e-05,
369
+ "loss": 6.1535,
370
+ "step": 24500
371
+ },
372
+ {
373
+ "epoch": 19.93620414673046,
374
+ "grad_norm": 2.5043203830718994,
375
+ "learning_rate": 4.177432216905902e-05,
376
+ "loss": 6.1481,
377
+ "step": 25000
378
+ },
379
+ {
380
+ "epoch": 20.33492822966507,
381
+ "grad_norm": 2.6015625,
382
+ "learning_rate": 4.1109782030834664e-05,
383
+ "loss": 6.1228,
384
+ "step": 25500
385
+ },
386
+ {
387
+ "epoch": 20.73365231259968,
388
+ "grad_norm": 3.39577579498291,
389
+ "learning_rate": 4.0445241892610315e-05,
390
+ "loss": 6.1113,
391
+ "step": 26000
392
+ },
393
+ {
394
+ "epoch": 21.13237639553429,
395
+ "grad_norm": 2.7234418392181396,
396
+ "learning_rate": 3.978070175438597e-05,
397
+ "loss": 6.1074,
398
+ "step": 26500
399
+ },
400
+ {
401
+ "epoch": 21.5311004784689,
402
+ "grad_norm": 2.6176741123199463,
403
+ "learning_rate": 3.911749069643807e-05,
404
+ "loss": 6.0859,
405
+ "step": 27000
406
+ },
407
+ {
408
+ "epoch": 21.92982456140351,
409
+ "grad_norm": 2.5121538639068604,
410
+ "learning_rate": 3.845427963849017e-05,
411
+ "loss": 6.0804,
412
+ "step": 27500
413
+ },
414
+ {
415
+ "epoch": 22.328548644338117,
416
+ "grad_norm": 2.72497296333313,
417
+ "learning_rate": 3.778973950026582e-05,
418
+ "loss": 6.0623,
419
+ "step": 28000
420
+ },
421
+ {
422
+ "epoch": 22.727272727272727,
423
+ "grad_norm": 2.9392683506011963,
424
+ "learning_rate": 3.712519936204147e-05,
425
+ "loss": 6.0536,
426
+ "step": 28500
427
+ },
428
+ {
429
+ "epoch": 23.125996810207337,
430
+ "grad_norm": 2.5510990619659424,
431
+ "learning_rate": 3.646065922381712e-05,
432
+ "loss": 6.0508,
433
+ "step": 29000
434
+ },
435
+ {
436
+ "epoch": 23.524720893141946,
437
+ "grad_norm": 2.9767627716064453,
438
+ "learning_rate": 3.5796119085592774e-05,
439
+ "loss": 6.029,
440
+ "step": 29500
441
+ },
442
+ {
443
+ "epoch": 23.923444976076556,
444
+ "grad_norm": 2.644033908843994,
445
+ "learning_rate": 3.513157894736842e-05,
446
+ "loss": 6.0329,
447
+ "step": 30000
448
+ },
449
+ {
450
+ "epoch": 24.322169059011163,
451
+ "grad_norm": 2.623633861541748,
452
+ "learning_rate": 3.446836788942052e-05,
453
+ "loss": 6.003,
454
+ "step": 30500
455
+ },
456
+ {
457
+ "epoch": 24.720893141945773,
458
+ "grad_norm": 2.6472909450531006,
459
+ "learning_rate": 3.3803827751196174e-05,
460
+ "loss": 6.0,
461
+ "step": 31000
462
+ },
463
+ {
464
+ "epoch": 25.119617224880383,
465
+ "grad_norm": 3.0670645236968994,
466
+ "learning_rate": 3.3139287612971825e-05,
467
+ "loss": 5.9948,
468
+ "step": 31500
469
+ },
470
+ {
471
+ "epoch": 25.518341307814993,
472
+ "grad_norm": 2.6866748332977295,
473
+ "learning_rate": 3.247474747474748e-05,
474
+ "loss": 5.9883,
475
+ "step": 32000
476
+ },
477
+ {
478
+ "epoch": 25.917065390749602,
479
+ "grad_norm": 2.8670027256011963,
480
+ "learning_rate": 3.181020733652313e-05,
481
+ "loss": 5.9801,
482
+ "step": 32500
483
+ },
484
+ {
485
+ "epoch": 26.31578947368421,
486
+ "grad_norm": 2.797853708267212,
487
+ "learning_rate": 3.114566719829878e-05,
488
+ "loss": 5.9665,
489
+ "step": 33000
490
+ },
491
+ {
492
+ "epoch": 26.71451355661882,
493
+ "grad_norm": 3.076382637023926,
494
+ "learning_rate": 3.0481127060074432e-05,
495
+ "loss": 5.9655,
496
+ "step": 33500
497
+ },
498
+ {
499
+ "epoch": 27.11323763955343,
500
+ "grad_norm": 2.7438435554504395,
501
+ "learning_rate": 2.981791600212653e-05,
502
+ "loss": 5.9445,
503
+ "step": 34000
504
+ },
505
+ {
506
+ "epoch": 27.51196172248804,
507
+ "grad_norm": 3.119704484939575,
508
+ "learning_rate": 2.9153375863902184e-05,
509
+ "loss": 5.9319,
510
+ "step": 34500
511
+ },
512
+ {
513
+ "epoch": 27.91068580542265,
514
+ "grad_norm": 2.594749927520752,
515
+ "learning_rate": 2.8488835725677832e-05,
516
+ "loss": 5.9327,
517
+ "step": 35000
518
+ },
519
+ {
520
+ "epoch": 28.30940988835726,
521
+ "grad_norm": 2.6697604656219482,
522
+ "learning_rate": 2.7824295587453487e-05,
523
+ "loss": 5.9261,
524
+ "step": 35500
525
+ },
526
+ {
527
+ "epoch": 28.708133971291865,
528
+ "grad_norm": 3.4312615394592285,
529
+ "learning_rate": 2.7159755449229135e-05,
530
+ "loss": 5.9146,
531
+ "step": 36000
532
+ },
533
+ {
534
+ "epoch": 29.106858054226475,
535
+ "grad_norm": 3.0306286811828613,
536
+ "learning_rate": 2.6495215311004783e-05,
537
+ "loss": 5.9243,
538
+ "step": 36500
539
+ },
540
+ {
541
+ "epoch": 29.505582137161085,
542
+ "grad_norm": 2.841744899749756,
543
+ "learning_rate": 2.583067517278044e-05,
544
+ "loss": 5.9046,
545
+ "step": 37000
546
+ },
547
+ {
548
+ "epoch": 29.904306220095695,
549
+ "grad_norm": 2.9730348587036133,
550
+ "learning_rate": 2.5166135034556087e-05,
551
+ "loss": 5.8976,
552
+ "step": 37500
553
+ },
554
+ {
555
+ "epoch": 30.0,
556
+ "step": 37620,
557
+ "total_flos": 1.773277289442432e+16,
558
+ "train_loss": 3.0397992164007466,
559
+ "train_runtime": 4647.9256,
560
+ "train_samples_per_second": 1035.565,
561
+ "train_steps_per_second": 8.094
562
  }
563
  ],
564
  "logging_steps": 500,
565
+ "max_steps": 37620,
566
  "num_input_tokens_seen": 0,
567
+ "num_train_epochs": 30,
568
  "save_steps": 500,
569
  "stateful_callbacks": {
570
  "TrainerControl": {
 
578
  "attributes": {}
579
  }
580
  },
581
+ "total_flos": 1.773277289442432e+16,
582
  "train_batch_size": 128,
583
  "trial_name": null,
584
  "trial_params": null