AlekseyKorshuk commited on
Commit
ce06b5e
1 Parent(s): 272cc03

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/bob-dylan")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/12pi332s/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bob Dylan's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/34f7xa4s) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/34f7xa4s/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
45
  dataset = load_dataset("huggingartists/bob-dylan")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/31a7e0lm/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Bob Dylan's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1h7wqver) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1h7wqver/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "huggingartists/bob-dylan",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -36,7 +36,7 @@
36
  }
37
  },
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.12.3",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
1
  {
2
+ "_name_or_path": "bob-dylan",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.19.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.7831153869628906, "eval_runtime": 20.4767, "eval_samples_per_second": 21.927, "eval_steps_per_second": 2.784, "epoch": 11.0}
1
+ {"eval_loss": 1.1156859397888184, "eval_runtime": 5.2897, "eval_samples_per_second": 82.046, "eval_steps_per_second": 10.398, "epoch": 11.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5455703e728e3cd6ffaf593b10aa5cf2233c7023ca11e9c24155e6a409f30a46
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:52eb735612f0abe86a36c99bbc88e4b736d213924b487ddc439a7fda4f3738ba
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0b57dd3c115876c0c7ea6456370527f6da21f3a6c9135e1da512b9308141a7e4
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20df6e4328ecf349bf08cc74a4faa3ceabf0373ff1ce5c11ee4657c56c5ebe05
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:32d33cb397d94e7ed9e2034a93b3d7fc4a7c2474a08659ab20f0ff40914ebf69
3
- size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6cbda632eba71ca8a5a6d2b2a32f60a0e0d89f0b2b5f27757234f2f9dea5b2bc
3
+ size 510396521
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bb5c4b938e52242f003d2aafdabeacb8b2645526444936a3f4c1786f434041a
3
  size 14567
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7446d5500cdd6761e0d9b127f879a785bc53369d1cd3923b64bfed4fdcf6b5a3
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b7edfdb51a39bca06643e38f8438d076a87c514b98497804d576bdb518cb004
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07ca2fdd8c3e336181f82585738bd2cd39530e31bea6189b6d35d926f6c48442
3
  size 623
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.7831153869628906,
3
- "best_model_checkpoint": "output/bob-dylan/checkpoint-3180",
4
- "epoch": 10.0,
5
- "global_step": 3180,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -3902,11 +3902,435 @@
3902
  "eval_samples_per_second": 22.062,
3903
  "eval_steps_per_second": 2.801,
3904
  "step": 3180
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3905
  }
3906
  ],
3907
- "max_steps": 3498,
3908
  "num_train_epochs": 11,
3909
- "total_flos": 3314620071936000.0,
3910
  "trial_name": null,
3911
  "trial_params": null
3912
  }
1
  {
2
+ "best_metric": 1.1156859397888184,
3
+ "best_model_checkpoint": "output/bob-dylan/checkpoint-3520",
4
+ "epoch": 11.0,
5
+ "global_step": 3520,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
3902
  "eval_samples_per_second": 22.062,
3903
  "eval_steps_per_second": 2.801,
3904
  "step": 3180
3905
+ },
3906
+ {
3907
+ "epoch": 9.95,
3908
+ "learning_rate": 0.00013645750858358395,
3909
+ "loss": 1.2433,
3910
+ "step": 3185
3911
+ },
3912
+ {
3913
+ "epoch": 9.97,
3914
+ "learning_rate": 0.0001368696722497127,
3915
+ "loss": 1.547,
3916
+ "step": 3190
3917
+ },
3918
+ {
3919
+ "epoch": 9.98,
3920
+ "learning_rate": 0.00013711736829567482,
3921
+ "loss": 1.4594,
3922
+ "step": 3195
3923
+ },
3924
+ {
3925
+ "epoch": 10.0,
3926
+ "learning_rate": 0.0001372,
3927
+ "loss": 1.3407,
3928
+ "step": 3200
3929
+ },
3930
+ {
3931
+ "epoch": 10.0,
3932
+ "eval_loss": 1.139600157737732,
3933
+ "eval_runtime": 5.2723,
3934
+ "eval_samples_per_second": 82.317,
3935
+ "eval_steps_per_second": 10.432,
3936
+ "step": 3200
3937
+ },
3938
+ {
3939
+ "epoch": 10.02,
3940
+ "learning_rate": 0.00013711736829567482,
3941
+ "loss": 1.4415,
3942
+ "step": 3205
3943
+ },
3944
+ {
3945
+ "epoch": 10.03,
3946
+ "learning_rate": 0.00013686967224971273,
3947
+ "loss": 1.2348,
3948
+ "step": 3210
3949
+ },
3950
+ {
3951
+ "epoch": 10.05,
3952
+ "learning_rate": 0.00013645750858358398,
3953
+ "loss": 1.4623,
3954
+ "step": 3215
3955
+ },
3956
+ {
3957
+ "epoch": 10.06,
3958
+ "learning_rate": 0.00013588187023566163,
3959
+ "loss": 1.437,
3960
+ "step": 3220
3961
+ },
3962
+ {
3963
+ "epoch": 10.08,
3964
+ "learning_rate": 0.00013514414396914573,
3965
+ "loss": 1.6916,
3966
+ "step": 3225
3967
+ },
3968
+ {
3969
+ "epoch": 10.09,
3970
+ "learning_rate": 0.00013424610703122958,
3971
+ "loss": 1.7023,
3972
+ "step": 3230
3973
+ },
3974
+ {
3975
+ "epoch": 10.11,
3976
+ "learning_rate": 0.00013318992287155525,
3977
+ "loss": 1.3172,
3978
+ "step": 3235
3979
+ },
3980
+ {
3981
+ "epoch": 10.12,
3982
+ "learning_rate": 0.00013197813593027435,
3983
+ "loss": 1.2053,
3984
+ "step": 3240
3985
+ },
3986
+ {
3987
+ "epoch": 10.14,
3988
+ "learning_rate": 0.00013061366550826825,
3989
+ "loss": 1.1869,
3990
+ "step": 3245
3991
+ },
3992
+ {
3993
+ "epoch": 10.16,
3994
+ "learning_rate": 0.00012909979873429724,
3995
+ "loss": 1.2981,
3996
+ "step": 3250
3997
+ },
3998
+ {
3999
+ "epoch": 10.17,
4000
+ "learning_rate": 0.0001274401826460187,
4001
+ "loss": 1.6608,
4002
+ "step": 3255
4003
+ },
4004
+ {
4005
+ "epoch": 10.19,
4006
+ "learning_rate": 0.00012563881540395474,
4007
+ "loss": 1.3115,
4008
+ "step": 3260
4009
+ },
4010
+ {
4011
+ "epoch": 10.2,
4012
+ "learning_rate": 0.00012370003665957216,
4013
+ "loss": 1.2824,
4014
+ "step": 3265
4015
+ },
4016
+ {
4017
+ "epoch": 10.22,
4018
+ "learning_rate": 0.00012162851710068375,
4019
+ "loss": 1.4082,
4020
+ "step": 3270
4021
+ },
4022
+ {
4023
+ "epoch": 10.23,
4024
+ "learning_rate": 0.00011942924719935029,
4025
+ "loss": 1.3048,
4026
+ "step": 3275
4027
+ },
4028
+ {
4029
+ "epoch": 10.25,
4030
+ "learning_rate": 0.00011710752518939736,
4031
+ "loss": 1.3276,
4032
+ "step": 3280
4033
+ },
4034
+ {
4035
+ "epoch": 10.27,
4036
+ "learning_rate": 0.0001146689443025054,
4037
+ "loss": 1.4064,
4038
+ "step": 3285
4039
+ },
4040
+ {
4041
+ "epoch": 10.28,
4042
+ "learning_rate": 0.00011211937929362613,
4043
+ "loss": 1.2408,
4044
+ "step": 3290
4045
+ },
4046
+ {
4047
+ "epoch": 10.3,
4048
+ "learning_rate": 0.00010946497228818107,
4049
+ "loss": 1.3932,
4050
+ "step": 3295
4051
+ },
4052
+ {
4053
+ "epoch": 10.31,
4054
+ "learning_rate": 0.00010671211798514499,
4055
+ "loss": 1.4576,
4056
+ "step": 3300
4057
+ },
4058
+ {
4059
+ "epoch": 10.33,
4060
+ "learning_rate": 0.00010386744825165496,
4061
+ "loss": 1.455,
4062
+ "step": 3305
4063
+ },
4064
+ {
4065
+ "epoch": 10.34,
4066
+ "learning_rate": 0.00010093781614626351,
4067
+ "loss": 1.3289,
4068
+ "step": 3310
4069
+ },
4070
+ {
4071
+ "epoch": 10.36,
4072
+ "learning_rate": 9.793027940931756e-05,
4073
+ "loss": 1.2645,
4074
+ "step": 3315
4075
+ },
4076
+ {
4077
+ "epoch": 10.38,
4078
+ "learning_rate": 9.485208346024504e-05,
4079
+ "loss": 1.39,
4080
+ "step": 3320
4081
+ },
4082
+ {
4083
+ "epoch": 10.39,
4084
+ "learning_rate": 9.17106439427063e-05,
4085
+ "loss": 1.3945,
4086
+ "step": 3325
4087
+ },
4088
+ {
4089
+ "epoch": 10.41,
4090
+ "learning_rate": 8.851352885965625e-05,
4091
+ "loss": 1.5375,
4092
+ "step": 3330
4093
+ },
4094
+ {
4095
+ "epoch": 10.42,
4096
+ "learning_rate": 8.526844034136417e-05,
4097
+ "loss": 1.4077,
4098
+ "step": 3335
4099
+ },
4100
+ {
4101
+ "epoch": 10.44,
4102
+ "learning_rate": 8.198319609030632e-05,
4103
+ "loss": 1.4331,
4104
+ "step": 3340
4105
+ },
4106
+ {
4107
+ "epoch": 10.45,
4108
+ "learning_rate": 7.866571054763788e-05,
4109
+ "loss": 1.8602,
4110
+ "step": 3345
4111
+ },
4112
+ {
4113
+ "epoch": 10.47,
4114
+ "learning_rate": 7.532397582660805e-05,
4115
+ "loss": 1.4865,
4116
+ "step": 3350
4117
+ },
4118
+ {
4119
+ "epoch": 10.48,
4120
+ "learning_rate": 7.19660424588612e-05,
4121
+ "loss": 1.2815,
4122
+ "step": 3355
4123
+ },
4124
+ {
4125
+ "epoch": 10.5,
4126
+ "learning_rate": 6.859999999999997e-05,
4127
+ "loss": 1.4705,
4128
+ "step": 3360
4129
+ },
4130
+ {
4131
+ "epoch": 10.52,
4132
+ "learning_rate": 6.523395754113922e-05,
4133
+ "loss": 1.1969,
4134
+ "step": 3365
4135
+ },
4136
+ {
4137
+ "epoch": 10.53,
4138
+ "learning_rate": 6.187602417339237e-05,
4139
+ "loss": 1.4564,
4140
+ "step": 3370
4141
+ },
4142
+ {
4143
+ "epoch": 10.55,
4144
+ "learning_rate": 5.853428945236207e-05,
4145
+ "loss": 1.4113,
4146
+ "step": 3375
4147
+ },
4148
+ {
4149
+ "epoch": 10.56,
4150
+ "learning_rate": 5.521680390969362e-05,
4151
+ "loss": 1.4642,
4152
+ "step": 3380
4153
+ },
4154
+ {
4155
+ "epoch": 10.58,
4156
+ "learning_rate": 5.193155965863624e-05,
4157
+ "loss": 1.4196,
4158
+ "step": 3385
4159
+ },
4160
+ {
4161
+ "epoch": 10.59,
4162
+ "learning_rate": 4.8686471140344147e-05,
4163
+ "loss": 1.3666,
4164
+ "step": 3390
4165
+ },
4166
+ {
4167
+ "epoch": 10.61,
4168
+ "learning_rate": 4.548935605729363e-05,
4169
+ "loss": 1.3908,
4170
+ "step": 3395
4171
+ },
4172
+ {
4173
+ "epoch": 10.62,
4174
+ "learning_rate": 4.23479165397549e-05,
4175
+ "loss": 1.4785,
4176
+ "step": 3400
4177
+ },
4178
+ {
4179
+ "epoch": 10.64,
4180
+ "learning_rate": 3.926972059068282e-05,
4181
+ "loss": 1.4775,
4182
+ "step": 3405
4183
+ },
4184
+ {
4185
+ "epoch": 10.66,
4186
+ "learning_rate": 3.626218385373685e-05,
4187
+ "loss": 1.4841,
4188
+ "step": 3410
4189
+ },
4190
+ {
4191
+ "epoch": 10.67,
4192
+ "learning_rate": 3.333255174834496e-05,
4193
+ "loss": 1.4263,
4194
+ "step": 3415
4195
+ },
4196
+ {
4197
+ "epoch": 10.69,
4198
+ "learning_rate": 3.0487882014855373e-05,
4199
+ "loss": 1.4815,
4200
+ "step": 3420
4201
+ },
4202
+ {
4203
+ "epoch": 10.7,
4204
+ "learning_rate": 2.7735027711819264e-05,
4205
+ "loss": 1.3612,
4206
+ "step": 3425
4207
+ },
4208
+ {
4209
+ "epoch": 10.72,
4210
+ "learning_rate": 2.508062070637383e-05,
4211
+ "loss": 1.3586,
4212
+ "step": 3430
4213
+ },
4214
+ {
4215
+ "epoch": 10.73,
4216
+ "learning_rate": 2.253105569749455e-05,
4217
+ "loss": 1.4036,
4218
+ "step": 3435
4219
+ },
4220
+ {
4221
+ "epoch": 10.75,
4222
+ "learning_rate": 2.0092474810602945e-05,
4223
+ "loss": 1.2455,
4224
+ "step": 3440
4225
+ },
4226
+ {
4227
+ "epoch": 10.77,
4228
+ "learning_rate": 1.7770752800649997e-05,
4229
+ "loss": 1.3747,
4230
+ "step": 3445
4231
+ },
4232
+ {
4233
+ "epoch": 10.78,
4234
+ "learning_rate": 1.5571482899316204e-05,
4235
+ "loss": 1.2848,
4236
+ "step": 3450
4237
+ },
4238
+ {
4239
+ "epoch": 10.8,
4240
+ "learning_rate": 1.3499963340427795e-05,
4241
+ "loss": 1.5623,
4242
+ "step": 3455
4243
+ },
4244
+ {
4245
+ "epoch": 10.81,
4246
+ "learning_rate": 1.1561184596045504e-05,
4247
+ "loss": 1.4704,
4248
+ "step": 3460
4249
+ },
4250
+ {
4251
+ "epoch": 10.83,
4252
+ "learning_rate": 9.759817353981509e-06,
4253
+ "loss": 1.3271,
4254
+ "step": 3465
4255
+ },
4256
+ {
4257
+ "epoch": 10.84,
4258
+ "learning_rate": 8.100201265702836e-06,
4259
+ "loss": 1.2696,
4260
+ "step": 3470
4261
+ },
4262
+ {
4263
+ "epoch": 10.86,
4264
+ "learning_rate": 6.586334491731833e-06,
4265
+ "loss": 1.5138,
4266
+ "step": 3475
4267
+ },
4268
+ {
4269
+ "epoch": 10.88,
4270
+ "learning_rate": 5.221864069725821e-06,
4271
+ "loss": 1.344,
4272
+ "step": 3480
4273
+ },
4274
+ {
4275
+ "epoch": 10.89,
4276
+ "learning_rate": 4.010077128444735e-06,
4277
+ "loss": 1.3544,
4278
+ "step": 3485
4279
+ },
4280
+ {
4281
+ "epoch": 10.91,
4282
+ "learning_rate": 2.9538929687704825e-06,
4283
+ "loss": 1.6602,
4284
+ "step": 3490
4285
+ },
4286
+ {
4287
+ "epoch": 10.92,
4288
+ "learning_rate": 2.0558560308543213e-06,
4289
+ "loss": 1.3761,
4290
+ "step": 3495
4291
+ },
4292
+ {
4293
+ "epoch": 10.94,
4294
+ "learning_rate": 1.3181297643384459e-06,
4295
+ "loss": 1.3709,
4296
+ "step": 3500
4297
+ },
4298
+ {
4299
+ "epoch": 10.95,
4300
+ "learning_rate": 7.424914164160148e-07,
4301
+ "loss": 1.3595,
4302
+ "step": 3505
4303
+ },
4304
+ {
4305
+ "epoch": 10.97,
4306
+ "learning_rate": 3.303277502872983e-07,
4307
+ "loss": 1.4077,
4308
+ "step": 3510
4309
+ },
4310
+ {
4311
+ "epoch": 10.98,
4312
+ "learning_rate": 8.263170432518063e-08,
4313
+ "loss": 1.4356,
4314
+ "step": 3515
4315
+ },
4316
+ {
4317
+ "epoch": 11.0,
4318
+ "learning_rate": 0.0,
4319
+ "loss": 1.7243,
4320
+ "step": 3520
4321
+ },
4322
+ {
4323
+ "epoch": 11.0,
4324
+ "eval_loss": 1.1156859397888184,
4325
+ "eval_runtime": 5.2715,
4326
+ "eval_samples_per_second": 82.33,
4327
+ "eval_steps_per_second": 10.433,
4328
+ "step": 3520
4329
  }
4330
  ],
4331
+ "max_steps": 3520,
4332
  "num_train_epochs": 11,
4333
+ "total_flos": 3668148191232000.0,
4334
  "trial_name": null,
4335
  "trial_params": null
4336
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ca4b64b447bcbc63fee889d7ebeafbf9eae6a003aa7f37b487c1df50308adc16
3
- size 2863
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8998c8154106cd43a7d424edf953518beb4d146ebea8364f94c30b8bca6902f7
3
+ size 3247