AlekseyKorshuk commited on
Commit
d624642
1 Parent(s): d9cb9cb

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/scriptonite")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3iyoxg0v/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Скриптонит (Scriptonite)'s lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/di1clo79) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/di1clo79/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/scriptonite")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/13pxeww0/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Скриптонит (Scriptonite)'s lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1itfp830) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1itfp830/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
1
  {
2
+ "_name_or_path": "huggingartists/scriptonite",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.727362871170044, "eval_runtime": 20.7773, "eval_samples_per_second": 21.369, "eval_steps_per_second": 2.695, "epoch": 1.0}
1
+ {"eval_loss": 1.5933494567871094, "eval_runtime": 22.2071, "eval_samples_per_second": 20.939, "eval_steps_per_second": 2.657, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b5c46ac5fd3298d2a01f9b7915560bc450f3997bc18b7dada5a182b6fbdeffa
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c11bfa3837c0edca55ca9f7ccdb4f3d86d19b9ba23fe6cf05c899732f878fa76
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8246c465fe86028eab455a564decb982d3c8b4cc33c5a236fa65250065f2dfa
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde59a917e4907642c41b1ba1a0796fe83190743077f4abc55b70f6d855f3a67
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4642ea9beb4bc34749ebce90968edfe32be19e17efd0ba3eb969a39d1b3c9fd3
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:218d35bc6715a739b952792ffa8615837479989eea7021decc404a768b2fed37
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:361696374a33a1ba4cfa105bbdd0232fc91f20b020d2290b0ed59f96e83efc13
3
- size 14439
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34f8219694a6e795ffa0539af4f5ebbbfd0956e15ebc61431bdc1cbffbd00148
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5643af0d2d309ea9f8d4e15ffbc8ebd3faca59b9cbbd8bf85f08e202e280408b
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc022d3bfdc6685b60396d3e5ef01fcd639162e46eae2acfcd548c1b5128e6c9
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/scriptonite", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.727362871170044,
3
- "best_model_checkpoint": "output/scriptonite/checkpoint-306",
4
- "epoch": 1.0,
5
- "global_step": 306,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -380,11 +380,379 @@
380
  "eval_samples_per_second": 21.336,
381
  "eval_steps_per_second": 2.691,
382
  "step": 306
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
383
  }
384
  ],
385
- "max_steps": 306,
386
- "num_train_epochs": 1,
387
- "total_flos": 319560155136000.0,
388
  "trial_name": null,
389
  "trial_params": null
390
  }
1
  {
2
+ "best_metric": 1.5933494567871094,
3
+ "best_model_checkpoint": "output/scriptonite/checkpoint-608",
4
+ "epoch": 2.0,
5
+ "global_step": 608,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
380
  "eval_samples_per_second": 21.336,
381
  "eval_steps_per_second": 2.691,
382
  "step": 306
383
+ },
384
+ {
385
+ "epoch": 1.02,
386
+ "learning_rate": 1.3182872524144475e-07,
387
+ "loss": 1.7503,
388
+ "step": 310
389
+ },
390
+ {
391
+ "epoch": 1.04,
392
+ "learning_rate": 4.427558891872417e-07,
393
+ "loss": 1.7186,
394
+ "step": 315
395
+ },
396
+ {
397
+ "epoch": 1.05,
398
+ "learning_rate": 9.35614586573241e-07,
399
+ "loss": 1.7355,
400
+ "step": 320
401
+ },
402
+ {
403
+ "epoch": 1.07,
404
+ "learning_rate": 1.6090892340046806e-06,
405
+ "loss": 1.7325,
406
+ "step": 325
407
+ },
408
+ {
409
+ "epoch": 1.09,
410
+ "learning_rate": 2.461382131516746e-06,
411
+ "loss": 1.5602,
412
+ "step": 330
413
+ },
414
+ {
415
+ "epoch": 1.1,
416
+ "learning_rate": 3.4902182611592783e-06,
417
+ "loss": 1.5739,
418
+ "step": 335
419
+ },
420
+ {
421
+ "epoch": 1.12,
422
+ "learning_rate": 4.692851359682261e-06,
423
+ "loss": 1.7024,
424
+ "step": 340
425
+ },
426
+ {
427
+ "epoch": 1.13,
428
+ "learning_rate": 6.066071249112177e-06,
429
+ "loss": 1.7912,
430
+ "step": 345
431
+ },
432
+ {
433
+ "epoch": 1.15,
434
+ "learning_rate": 7.606212405651899e-06,
435
+ "loss": 1.7776,
436
+ "step": 350
437
+ },
438
+ {
439
+ "epoch": 1.17,
440
+ "learning_rate": 9.309163744031197e-06,
441
+ "loss": 1.8068,
442
+ "step": 355
443
+ },
444
+ {
445
+ "epoch": 1.18,
446
+ "learning_rate": 1.1170379591190527e-05,
447
+ "loss": 1.7059,
448
+ "step": 360
449
+ },
450
+ {
451
+ "epoch": 1.2,
452
+ "learning_rate": 1.3184891820006337e-05,
453
+ "loss": 1.7498,
454
+ "step": 365
455
+ },
456
+ {
457
+ "epoch": 1.22,
458
+ "learning_rate": 1.5347323110669033e-05,
459
+ "loss": 1.7696,
460
+ "step": 370
461
+ },
462
+ {
463
+ "epoch": 1.23,
464
+ "learning_rate": 1.7651901304315657e-05,
465
+ "loss": 1.703,
466
+ "step": 375
467
+ },
468
+ {
469
+ "epoch": 1.25,
470
+ "learning_rate": 2.009247481060283e-05,
471
+ "loss": 1.6793,
472
+ "step": 380
473
+ },
474
+ {
475
+ "epoch": 1.27,
476
+ "learning_rate": 2.2662529028092886e-05,
477
+ "loss": 1.7064,
478
+ "step": 385
479
+ },
480
+ {
481
+ "epoch": 1.28,
482
+ "learning_rate": 2.5355203733622272e-05,
483
+ "loss": 1.6889,
484
+ "step": 390
485
+ },
486
+ {
487
+ "epoch": 1.3,
488
+ "learning_rate": 2.816331139423508e-05,
489
+ "loss": 1.7357,
490
+ "step": 395
491
+ },
492
+ {
493
+ "epoch": 1.32,
494
+ "learning_rate": 3.1079356352801514e-05,
495
+ "loss": 1.6548,
496
+ "step": 400
497
+ },
498
+ {
499
+ "epoch": 1.33,
500
+ "learning_rate": 3.4095554836109936e-05,
501
+ "loss": 1.7133,
502
+ "step": 405
503
+ },
504
+ {
505
+ "epoch": 1.35,
506
+ "learning_rate": 3.7203855732024184e-05,
507
+ "loss": 1.6095,
508
+ "step": 410
509
+ },
510
+ {
511
+ "epoch": 1.37,
512
+ "learning_rate": 4.039596208024663e-05,
513
+ "loss": 1.6151,
514
+ "step": 415
515
+ },
516
+ {
517
+ "epoch": 1.38,
518
+ "learning_rate": 4.3663353219321276e-05,
519
+ "loss": 1.6988,
520
+ "step": 420
521
+ },
522
+ {
523
+ "epoch": 1.4,
524
+ "learning_rate": 4.6997307530760796e-05,
525
+ "loss": 1.8323,
526
+ "step": 425
527
+ },
528
+ {
529
+ "epoch": 1.41,
530
+ "learning_rate": 5.038892571958626e-05,
531
+ "loss": 1.6735,
532
+ "step": 430
533
+ },
534
+ {
535
+ "epoch": 1.43,
536
+ "learning_rate": 5.382915456913676e-05,
537
+ "loss": 1.6212,
538
+ "step": 435
539
+ },
540
+ {
541
+ "epoch": 1.45,
542
+ "learning_rate": 5.7308811106741675e-05,
543
+ "loss": 1.7788,
544
+ "step": 440
545
+ },
546
+ {
547
+ "epoch": 1.46,
548
+ "learning_rate": 6.0818607115748475e-05,
549
+ "loss": 1.6847,
550
+ "step": 445
551
+ },
552
+ {
553
+ "epoch": 1.48,
554
+ "learning_rate": 6.434917392847842e-05,
555
+ "loss": 1.6746,
556
+ "step": 450
557
+ },
558
+ {
559
+ "epoch": 1.5,
560
+ "learning_rate": 6.78910874339285e-05,
561
+ "loss": 1.7122,
562
+ "step": 455
563
+ },
564
+ {
565
+ "epoch": 1.51,
566
+ "learning_rate": 7.143489323346855e-05,
567
+ "loss": 1.6207,
568
+ "step": 460
569
+ },
570
+ {
571
+ "epoch": 1.53,
572
+ "learning_rate": 7.497113187738444e-05,
573
+ "loss": 1.719,
574
+ "step": 465
575
+ },
576
+ {
577
+ "epoch": 1.55,
578
+ "learning_rate": 7.849036411490396e-05,
579
+ "loss": 1.814,
580
+ "step": 470
581
+ },
582
+ {
583
+ "epoch": 1.56,
584
+ "learning_rate": 8.19831960903064e-05,
585
+ "loss": 1.7822,
586
+ "step": 475
587
+ },
588
+ {
589
+ "epoch": 1.58,
590
+ "learning_rate": 8.54403044178588e-05,
591
+ "loss": 1.7602,
592
+ "step": 480
593
+ },
594
+ {
595
+ "epoch": 1.6,
596
+ "learning_rate": 8.885246106864805e-05,
597
+ "loss": 1.6178,
598
+ "step": 485
599
+ },
600
+ {
601
+ "epoch": 1.61,
602
+ "learning_rate": 9.221055800287699e-05,
603
+ "loss": 1.7397,
604
+ "step": 490
605
+ },
606
+ {
607
+ "epoch": 1.63,
608
+ "learning_rate": 9.550563148187563e-05,
609
+ "loss": 1.6071,
610
+ "step": 495
611
+ },
612
+ {
613
+ "epoch": 1.64,
614
+ "learning_rate": 9.87288859949296e-05,
615
+ "loss": 1.66,
616
+ "step": 500
617
+ },
618
+ {
619
+ "epoch": 1.66,
620
+ "learning_rate": 0.00010187171773706003,
621
+ "loss": 1.6028,
622
+ "step": 505
623
+ },
624
+ {
625
+ "epoch": 1.68,
626
+ "learning_rate": 0.00010492573757508462,
627
+ "loss": 1.697,
628
+ "step": 510
629
+ },
630
+ {
631
+ "epoch": 1.69,
632
+ "learning_rate": 0.0001078827934406569,
633
+ "loss": 1.5973,
634
+ "step": 515
635
+ },
636
+ {
637
+ "epoch": 1.71,
638
+ "learning_rate": 0.00011073499209051121,
639
+ "loss": 1.6779,
640
+ "step": 520
641
+ },
642
+ {
643
+ "epoch": 1.73,
644
+ "learning_rate": 0.00011347472017582718,
645
+ "loss": 1.639,
646
+ "step": 525
647
+ },
648
+ {
649
+ "epoch": 1.74,
650
+ "learning_rate": 0.00011609466456447531,
651
+ "loss": 1.7209,
652
+ "step": 530
653
+ },
654
+ {
655
+ "epoch": 1.76,
656
+ "learning_rate": 0.00011858783186189604,
657
+ "loss": 1.7896,
658
+ "step": 535
659
+ },
660
+ {
661
+ "epoch": 1.78,
662
+ "learning_rate": 0.00012094756707850673,
663
+ "loss": 1.7093,
664
+ "step": 540
665
+ },
666
+ {
667
+ "epoch": 1.79,
668
+ "learning_rate": 0.00012316757139380674,
669
+ "loss": 1.614,
670
+ "step": 545
671
+ },
672
+ {
673
+ "epoch": 1.81,
674
+ "learning_rate": 0.0001252419189697637,
675
+ "loss": 1.7043,
676
+ "step": 550
677
+ },
678
+ {
679
+ "epoch": 1.83,
680
+ "learning_rate": 0.0001271650727686013,
681
+ "loss": 1.6544,
682
+ "step": 555
683
+ },
684
+ {
685
+ "epoch": 1.84,
686
+ "learning_rate": 0.00012893189933276512,
687
+ "loss": 1.6355,
688
+ "step": 560
689
+ },
690
+ {
691
+ "epoch": 1.86,
692
+ "learning_rate": 0.00013053768248761616,
693
+ "loss": 1.7327,
694
+ "step": 565
695
+ },
696
+ {
697
+ "epoch": 1.88,
698
+ "learning_rate": 0.00013197813593027427,
699
+ "loss": 1.5672,
700
+ "step": 570
701
+ },
702
+ {
703
+ "epoch": 1.89,
704
+ "learning_rate": 0.00013324941467100882,
705
+ "loss": 1.6036,
706
+ "step": 575
707
+ },
708
+ {
709
+ "epoch": 1.91,
710
+ "learning_rate": 0.00013434812529663614,
711
+ "loss": 1.5748,
712
+ "step": 580
713
+ },
714
+ {
715
+ "epoch": 1.92,
716
+ "learning_rate": 0.00013527133502852737,
717
+ "loss": 1.6181,
718
+ "step": 585
719
+ },
720
+ {
721
+ "epoch": 1.94,
722
+ "learning_rate": 0.00013601657955104887,
723
+ "loss": 1.6947,
724
+ "step": 590
725
+ },
726
+ {
727
+ "epoch": 1.96,
728
+ "learning_rate": 0.00013658186958953817,
729
+ "loss": 1.6579,
730
+ "step": 595
731
+ },
732
+ {
733
+ "epoch": 1.97,
734
+ "learning_rate": 0.00013696569622025754,
735
+ "loss": 1.6297,
736
+ "step": 600
737
+ },
738
+ {
739
+ "epoch": 1.99,
740
+ "learning_rate": 0.00013716703489815072,
741
+ "loss": 1.6272,
742
+ "step": 605
743
+ },
744
+ {
745
+ "epoch": 2.0,
746
+ "eval_loss": 1.5933494567871094,
747
+ "eval_runtime": 22.1769,
748
+ "eval_samples_per_second": 20.968,
749
+ "eval_steps_per_second": 2.66,
750
+ "step": 608
751
  }
752
  ],
753
+ "max_steps": 608,
754
+ "num_train_epochs": 2,
755
+ "total_flos": 634286407680000.0,
756
  "trial_name": null,
757
  "trial_params": null
758
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aaaf6708f9300a70cac38b2d9556974db0fe2981edabe48909dbaf43501c3dca
3
  size 2671
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9edd5597a8f5fd166f6b738b741bd4ca0054d7d9cca4c5ca6734b697c0e34cda
3
  size 2671