AlekseyKorshuk commited on
Commit
6e67db2
1 Parent(s): 531a648

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/50-cent")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/20hup4zs/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3mjwl9eq) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3mjwl9eq/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/50-cent")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/1awg3ygb/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on 50 Cent's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/ld8pvc1j/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
1
  {
2
+ "_name_or_path": "huggingartists/50-cent",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 3.57045841217041, "eval_runtime": 16.4515, "eval_samples_per_second": 22.49, "eval_steps_per_second": 2.857, "epoch": 1.0}
 
1
+ {"eval_loss": 3.3637726306915283, "eval_runtime": 16.4999, "eval_samples_per_second": 20.727, "eval_steps_per_second": 2.606, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9827d417fb0de468e6fcc87474696d7324c62371704595e5db2b86387e430d4f
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:358a6f342b56f9de0136e070ea234ddaa8e96700df6535caddb90f83533ad73a
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bcd47e8928507e9fd970c4e3f0d0f823da8245285e0b0218df26a4d69fea66c0
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:54d8c0438e44a5c817661dde8d80235759dd9bf6fd31e60b82390ac67a903164
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba9de888594396c86e2cf894c62dd4e48624d116cf4c441b6dd1a69c5b7e02c9
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39028db0b9207c67d95d8bc7b1634c498f86342adff06c8b1c9d2d4614a45efe
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92dbfd343ee86788257a9f9b700923ca652db2e930c8f12766afb964d94ec657
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1962bee387c556be33f8635e4ecc47b5f5d760f1525f5cf294369a7e3bac39b
3
+ size 14439
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f73ccb458a3740b64f04a91bff674b6289e4a70414fadf4a36dcb72344df060
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56a03ee9c4f7d46c39bb609d5652834cf98daba2d29c096120b392bb7cba62b1
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/50-cent", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 3.57045841217041,
3
- "best_model_checkpoint": "output/50-cent/checkpoint-258",
4
- "epoch": 1.0,
5
- "global_step": 258,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -320,11 +320,345 @@
320
  "eval_samples_per_second": 22.47,
321
  "eval_steps_per_second": 2.854,
322
  "step": 258
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
323
  }
324
  ],
325
- "max_steps": 258,
326
- "num_train_epochs": 1,
327
- "total_flos": 268869500928000.0,
328
  "trial_name": null,
329
  "trial_params": null
330
  }
 
1
  {
2
+ "best_metric": 3.3637726306915283,
3
+ "best_model_checkpoint": "output/50-cent/checkpoint-522",
4
+ "epoch": 2.0,
5
+ "global_step": 522,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
320
  "eval_samples_per_second": 22.47,
321
  "eval_steps_per_second": 2.854,
322
  "step": 258
323
+ },
324
+ {
325
+ "epoch": 1.0,
326
+ "learning_rate": 4.969441783384187e-09,
327
+ "loss": 3.3342,
328
+ "step": 260
329
+ },
330
+ {
331
+ "epoch": 1.0,
332
+ "eval_loss": 3.3782756328582764,
333
+ "eval_runtime": 16.6767,
334
+ "eval_samples_per_second": 20.508,
335
+ "eval_steps_per_second": 2.578,
336
+ "step": 261
337
+ },
338
+ {
339
+ "epoch": 1.02,
340
+ "learning_rate": 7.949666974663345e-08,
341
+ "loss": 3.4715,
342
+ "step": 265
343
+ },
344
+ {
345
+ "epoch": 1.03,
346
+ "learning_rate": 4.0213613921093164e-07,
347
+ "loss": 3.4357,
348
+ "step": 270
349
+ },
350
+ {
351
+ "epoch": 1.05,
352
+ "learning_rate": 9.717195750166447e-07,
353
+ "loss": 3.3402,
354
+ "step": 275
355
+ },
356
+ {
357
+ "epoch": 1.07,
358
+ "learning_rate": 1.7861845197078197e-06,
359
+ "loss": 3.395,
360
+ "step": 280
361
+ },
362
+ {
363
+ "epoch": 1.09,
364
+ "learning_rate": 2.842581801675534e-06,
365
+ "loss": 3.3568,
366
+ "step": 285
367
+ },
368
+ {
369
+ "epoch": 1.11,
370
+ "learning_rate": 4.137086214086682e-06,
371
+ "loss": 3.6921,
372
+ "step": 290
373
+ },
374
+ {
375
+ "epoch": 1.13,
376
+ "learning_rate": 5.66501036593004e-06,
377
+ "loss": 3.4689,
378
+ "step": 295
379
+ },
380
+ {
381
+ "epoch": 1.15,
382
+ "learning_rate": 7.420821655024756e-06,
383
+ "loss": 3.5794,
384
+ "step": 300
385
+ },
386
+ {
387
+ "epoch": 1.17,
388
+ "learning_rate": 9.39816230153247e-06,
389
+ "loss": 3.4054,
390
+ "step": 305
391
+ },
392
+ {
393
+ "epoch": 1.19,
394
+ "learning_rate": 1.1589872369431459e-05,
395
+ "loss": 3.5207,
396
+ "step": 310
397
+ },
398
+ {
399
+ "epoch": 1.21,
400
+ "learning_rate": 1.3988015692592823e-05,
401
+ "loss": 3.352,
402
+ "step": 315
403
+ },
404
+ {
405
+ "epoch": 1.23,
406
+ "learning_rate": 1.658390861157988e-05,
407
+ "loss": 3.5263,
408
+ "step": 320
409
+ },
410
+ {
411
+ "epoch": 1.25,
412
+ "learning_rate": 1.936815141711555e-05,
413
+ "loss": 3.4251,
414
+ "step": 325
415
+ },
416
+ {
417
+ "epoch": 1.26,
418
+ "learning_rate": 2.2330662386360735e-05,
419
+ "loss": 3.5036,
420
+ "step": 330
421
+ },
422
+ {
423
+ "epoch": 1.28,
424
+ "learning_rate": 2.5460714288759305e-05,
425
+ "loss": 3.6056,
426
+ "step": 335
427
+ },
428
+ {
429
+ "epoch": 1.3,
430
+ "learning_rate": 2.8746973229261208e-05,
431
+ "loss": 3.4339,
432
+ "step": 340
433
+ },
434
+ {
435
+ "epoch": 1.32,
436
+ "learning_rate": 3.2177539688273746e-05,
437
+ "loss": 3.2688,
438
+ "step": 345
439
+ },
440
+ {
441
+ "epoch": 1.34,
442
+ "learning_rate": 3.5739991609734934e-05,
443
+ "loss": 3.6447,
444
+ "step": 350
445
+ },
446
+ {
447
+ "epoch": 1.36,
448
+ "learning_rate": 3.9421429381287695e-05,
449
+ "loss": 3.4024,
450
+ "step": 355
451
+ },
452
+ {
453
+ "epoch": 1.38,
454
+ "learning_rate": 4.320852254368187e-05,
455
+ "loss": 3.332,
456
+ "step": 360
457
+ },
458
+ {
459
+ "epoch": 1.4,
460
+ "learning_rate": 4.7087558060269536e-05,
461
+ "loss": 3.4638,
462
+ "step": 365
463
+ },
464
+ {
465
+ "epoch": 1.42,
466
+ "learning_rate": 5.1044489971810725e-05,
467
+ "loss": 3.5294,
468
+ "step": 370
469
+ },
470
+ {
471
+ "epoch": 1.44,
472
+ "learning_rate": 5.506499025678891e-05,
473
+ "loss": 3.4554,
474
+ "step": 375
475
+ },
476
+ {
477
+ "epoch": 1.46,
478
+ "learning_rate": 5.9134500713072235e-05,
479
+ "loss": 3.2164,
480
+ "step": 380
481
+ },
482
+ {
483
+ "epoch": 1.48,
484
+ "learning_rate": 6.323828567305678e-05,
485
+ "loss": 3.5425,
486
+ "step": 385
487
+ },
488
+ {
489
+ "epoch": 1.49,
490
+ "learning_rate": 6.736148536141151e-05,
491
+ "loss": 3.3742,
492
+ "step": 390
493
+ },
494
+ {
495
+ "epoch": 1.51,
496
+ "learning_rate": 7.148916970221591e-05,
497
+ "loss": 3.4436,
498
+ "step": 395
499
+ },
500
+ {
501
+ "epoch": 1.53,
502
+ "learning_rate": 7.560639238065579e-05,
503
+ "loss": 3.4376,
504
+ "step": 400
505
+ },
506
+ {
507
+ "epoch": 1.55,
508
+ "learning_rate": 7.969824496351964e-05,
509
+ "loss": 3.5896,
510
+ "step": 405
511
+ },
512
+ {
513
+ "epoch": 1.57,
514
+ "learning_rate": 8.374991088252677e-05,
515
+ "loss": 3.4729,
516
+ "step": 410
517
+ },
518
+ {
519
+ "epoch": 1.59,
520
+ "learning_rate": 8.774671908501242e-05,
521
+ "loss": 3.4136,
522
+ "step": 415
523
+ },
524
+ {
525
+ "epoch": 1.61,
526
+ "learning_rate": 9.1674197157702e-05,
527
+ "loss": 3.475,
528
+ "step": 420
529
+ },
530
+ {
531
+ "epoch": 1.63,
532
+ "learning_rate": 9.551812373121417e-05,
533
+ "loss": 3.3729,
534
+ "step": 425
535
+ },
536
+ {
537
+ "epoch": 1.65,
538
+ "learning_rate": 9.926457997553504e-05,
539
+ "loss": 3.5851,
540
+ "step": 430
541
+ },
542
+ {
543
+ "epoch": 1.67,
544
+ "learning_rate": 0.00010290000000000001,
545
+ "loss": 3.4967,
546
+ "step": 435
547
+ },
548
+ {
549
+ "epoch": 1.69,
550
+ "learning_rate": 0.0001064112199752845,
551
+ "loss": 3.3427,
552
+ "step": 440
553
+ },
554
+ {
555
+ "epoch": 1.7,
556
+ "learning_rate": 0.0001097855257995339,
557
+ "loss": 3.5208,
558
+ "step": 445
559
+ },
560
+ {
561
+ "epoch": 1.72,
562
+ "learning_rate": 0.00011301069913603334,
563
+ "loss": 3.4049,
564
+ "step": 450
565
+ },
566
+ {
567
+ "epoch": 1.74,
568
+ "learning_rate": 0.00011607506165571554,
569
+ "loss": 3.452,
570
+ "step": 455
571
+ },
572
+ {
573
+ "epoch": 1.76,
574
+ "learning_rate": 0.00011896751732430487,
575
+ "loss": 3.5148,
576
+ "step": 460
577
+ },
578
+ {
579
+ "epoch": 1.78,
580
+ "learning_rate": 0.00012167759258097654,
581
+ "loss": 3.3147,
582
+ "step": 465
583
+ },
584
+ {
585
+ "epoch": 1.8,
586
+ "learning_rate": 0.00012419547426304373,
587
+ "loss": 3.38,
588
+ "step": 470
589
+ },
590
+ {
591
+ "epoch": 1.82,
592
+ "learning_rate": 0.00012651204513934757,
593
+ "loss": 3.3329,
594
+ "step": 475
595
+ },
596
+ {
597
+ "epoch": 1.84,
598
+ "learning_rate": 0.00012861891692368509,
599
+ "loss": 3.5188,
600
+ "step": 480
601
+ },
602
+ {
603
+ "epoch": 1.86,
604
+ "learning_rate": 0.00013050846064873163,
605
+ "loss": 3.4628,
606
+ "step": 485
607
+ },
608
+ {
609
+ "epoch": 1.88,
610
+ "learning_rate": 0.0001321738342904763,
611
+ "loss": 3.5377,
612
+ "step": 490
613
+ },
614
+ {
615
+ "epoch": 1.9,
616
+ "learning_rate": 0.00013360900754314024,
617
+ "loss": 3.4683,
618
+ "step": 495
619
+ },
620
+ {
621
+ "epoch": 1.92,
622
+ "learning_rate": 0.00013480878365487042,
623
+ "loss": 3.4532,
624
+ "step": 500
625
+ },
626
+ {
627
+ "epoch": 1.93,
628
+ "learning_rate": 0.00013576881824513962,
629
+ "loss": 3.5582,
630
+ "step": 505
631
+ },
632
+ {
633
+ "epoch": 1.95,
634
+ "learning_rate": 0.00013648563503571674,
635
+ "loss": 3.5528,
636
+ "step": 510
637
+ },
638
+ {
639
+ "epoch": 1.97,
640
+ "learning_rate": 0.00013695663843824482,
641
+ "loss": 3.4235,
642
+ "step": 515
643
+ },
644
+ {
645
+ "epoch": 1.99,
646
+ "learning_rate": 0.00013718012295284757,
647
+ "loss": 3.5739,
648
+ "step": 520
649
+ },
650
+ {
651
+ "epoch": 2.0,
652
+ "eval_loss": 3.3637726306915283,
653
+ "eval_runtime": 16.4835,
654
+ "eval_samples_per_second": 20.748,
655
+ "eval_steps_per_second": 2.609,
656
+ "step": 522
657
  }
658
  ],
659
+ "max_steps": 522,
660
+ "num_train_epochs": 2,
661
+ "total_flos": 544271302656000.0,
662
  "trial_name": null,
663
  "trial_params": null
664
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0338d38146cc715ce37a1415c1593b31576abfbd8eef8f32a19e914cd42011fa
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6f290bf43c68f84d50f113a11b20c55809e90aac8aa7e58b408f5dbe3f578ab
3
  size 2671