AlekseyKorshuk commited on
Commit
7495459
1 Parent(s): cf5a035

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/pharaoh")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2j2p1ulk/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on PHARAOH's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/1dgmwwm3) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/1dgmwwm3/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/pharaoh")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/3kao1xf5/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on PHARAOH's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/xsxegmjq) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/xsxegmjq/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -35,7 +35,7 @@
35
  }
36
  },
37
  "torch_dtype": "float32",
38
- "transformers_version": "4.10.0",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
 
1
  {
2
+ "_name_or_path": "huggingartists/pharaoh",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
 
35
  }
36
  },
37
  "torch_dtype": "float32",
38
+ "transformers_version": "4.11.2",
39
  "use_cache": true,
40
  "vocab_size": 50257
41
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.699326515197754, "eval_runtime": 10.4841, "eval_samples_per_second": 22.606, "eval_steps_per_second": 2.861, "epoch": 1.0}
 
1
+ {"eval_loss": 1.7224986553192139, "eval_runtime": 22.1306, "eval_samples_per_second": 22.548, "eval_steps_per_second": 2.847, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26cbcf0420766236c89194181a5733b514b4c264cdea2f09855e9306bfd740c7
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a2b4c5319db04b9872d2055b5b79d1b4807f1c3915b4626047165c4d8e7d4ec
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe1daefc1a60e30524b28874d4be522c1e80a4766d1a84d1d30ff9599aabf714
3
- size 995603825
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:26bbacfce71425f128a87fdb7029463499be8bc995eb14eb839740e92b47317a
3
+ size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f77e516fe0382ad6598763db798fe469510208becc1e25a055700e3eee0fb6f
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bff62d0348d173b28829e07fec717386e9eef99e506d12c023aa35c55239ae7d
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f77f96ae7ce45ce31ca4af2726b72623ac2be13cdb55ba1d562b78afb740e564
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9da0f194b462662f09bef1ea47010e6352ae99c4cb094909f982d2deeb47ae8b
3
+ size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48b660f4750a1868cd1a9877f5c933f8f45093566a7accb3c6c62e58f34d678f
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ed819f11a0f4645df58a01f50e8c26eb756310ea7f77b118bb1ec57797118f8
3
  size 623
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
 
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/pharaoh", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,264 +1,222 @@
1
  {
2
- "best_metric": 1.699326515197754,
3
- "best_model_checkpoint": "output/pharaoh/checkpoint-200",
4
  "epoch": 1.0,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.03,
12
- "learning_rate": 0.0001369885290940926,
13
- "loss": 2.5312,
14
  "step": 5
15
  },
16
  {
17
- "epoch": 0.05,
18
- "learning_rate": 0.00013635542016482644,
19
- "loss": 2.1558,
20
  "step": 10
21
  },
22
  {
23
- "epoch": 0.07,
24
- "learning_rate": 0.0001353045765392806,
25
- "loss": 2.1677,
26
  "step": 15
27
  },
28
  {
29
- "epoch": 0.1,
30
- "learning_rate": 0.00013384247701784754,
31
- "loss": 2.0874,
32
  "step": 20
33
  },
34
  {
35
- "epoch": 0.12,
36
- "learning_rate": 0.00013197813593027427,
37
- "loss": 2.0224,
38
  "step": 25
39
  },
40
  {
41
- "epoch": 0.15,
42
- "learning_rate": 0.00012972304755932203,
43
- "loss": 1.9402,
44
  "step": 30
45
  },
46
  {
47
- "epoch": 0.17,
48
- "learning_rate": 0.00012709111527469072,
49
- "loss": 2.113,
50
  "step": 35
51
  },
52
  {
53
- "epoch": 0.2,
54
- "learning_rate": 0.0001240985658141214,
55
- "loss": 2.0482,
56
  "step": 40
57
  },
58
  {
59
- "epoch": 0.23,
60
- "learning_rate": 0.00012076384924016213,
61
- "loss": 1.843,
62
  "step": 45
63
  },
64
  {
65
- "epoch": 0.25,
66
- "learning_rate": 0.00011710752518939715,
67
- "loss": 2.0109,
68
  "step": 50
69
  },
70
  {
71
- "epoch": 0.28,
72
- "learning_rate": 0.00011315213611545058,
73
- "loss": 1.9608,
74
  "step": 55
75
  },
76
  {
77
- "epoch": 0.3,
78
- "learning_rate": 0.00010892206830726365,
79
- "loss": 2.053,
80
  "step": 60
81
  },
82
  {
83
- "epoch": 0.33,
84
- "learning_rate": 0.00010444340153951408,
85
- "loss": 1.9682,
86
  "step": 65
87
  },
88
  {
89
- "epoch": 0.35,
90
- "learning_rate": 9.974374828213291e-05,
91
- "loss": 1.9954,
92
  "step": 70
93
  },
94
  {
95
- "epoch": 0.38,
96
- "learning_rate": 9.485208346024516e-05,
97
- "loss": 1.9485,
98
  "step": 75
99
  },
100
  {
101
- "epoch": 0.4,
102
- "learning_rate": 8.97985658141214e-05,
103
- "loss": 1.8621,
104
  "step": 80
105
  },
106
  {
107
- "epoch": 0.42,
108
- "learning_rate": 8.461435196051512e-05,
109
- "loss": 1.9375,
110
  "step": 85
111
  },
112
  {
113
- "epoch": 0.45,
114
- "learning_rate": 7.933140430175984e-05,
115
- "loss": 1.8682,
116
  "step": 90
117
  },
118
  {
119
- "epoch": 0.47,
120
- "learning_rate": 7.398229396693017e-05,
121
- "loss": 1.9141,
122
  "step": 95
123
  },
124
  {
125
- "epoch": 0.5,
126
- "learning_rate": 6.86e-05,
127
- "loss": 1.8811,
128
  "step": 100
129
  },
130
  {
131
- "epoch": 0.53,
132
- "learning_rate": 6.321770603306985e-05,
133
- "loss": 1.9623,
134
  "step": 105
135
  },
136
  {
137
- "epoch": 0.55,
138
- "learning_rate": 5.786859569824015e-05,
139
- "loss": 1.8849,
140
  "step": 110
141
  },
142
  {
143
- "epoch": 0.57,
144
- "learning_rate": 5.258564803948491e-05,
145
- "loss": 1.891,
146
  "step": 115
147
  },
148
  {
149
- "epoch": 0.6,
150
- "learning_rate": 4.740143418587861e-05,
151
- "loss": 1.7659,
152
  "step": 120
153
  },
154
  {
155
- "epoch": 0.62,
156
- "learning_rate": 4.2347916539754844e-05,
157
- "loss": 1.8337,
158
  "step": 125
159
  },
160
  {
161
- "epoch": 0.65,
162
- "learning_rate": 3.745625171786709e-05,
163
- "loss": 1.8855,
164
  "step": 130
165
  },
166
  {
167
- "epoch": 0.68,
168
- "learning_rate": 3.275659846048591e-05,
169
- "loss": 1.8395,
170
  "step": 135
171
  },
172
  {
173
- "epoch": 0.7,
174
- "learning_rate": 2.8277931692736352e-05,
175
- "loss": 1.8782,
176
  "step": 140
177
  },
178
  {
179
- "epoch": 0.72,
180
- "learning_rate": 2.4047863884549408e-05,
181
- "loss": 1.8944,
182
  "step": 145
183
  },
184
  {
185
- "epoch": 0.75,
186
- "learning_rate": 2.0092474810602843e-05,
187
- "loss": 1.913,
188
  "step": 150
189
  },
190
  {
191
- "epoch": 0.78,
192
- "learning_rate": 1.6436150759837878e-05,
193
- "loss": 1.7538,
194
  "step": 155
195
  },
196
  {
197
- "epoch": 0.8,
198
- "learning_rate": 1.3101434185878613e-05,
199
- "loss": 1.9117,
200
  "step": 160
201
  },
202
  {
203
- "epoch": 0.82,
204
- "learning_rate": 1.0108884725309277e-05,
205
- "loss": 1.8817,
206
  "step": 165
207
  },
208
- {
209
- "epoch": 0.85,
210
- "learning_rate": 7.476952440677969e-06,
211
- "loss": 1.8265,
212
- "step": 170
213
- },
214
- {
215
- "epoch": 0.88,
216
- "learning_rate": 5.22186406972573e-06,
217
- "loss": 1.7538,
218
- "step": 175
219
- },
220
- {
221
- "epoch": 0.9,
222
- "learning_rate": 3.357522982152468e-06,
223
- "loss": 1.7725,
224
- "step": 180
225
- },
226
- {
227
- "epoch": 0.93,
228
- "learning_rate": 1.8954234607193881e-06,
229
- "loss": 1.8434,
230
- "step": 185
231
- },
232
- {
233
- "epoch": 0.95,
234
- "learning_rate": 8.445798351735566e-07,
235
- "loss": 1.7535,
236
- "step": 190
237
- },
238
- {
239
- "epoch": 0.97,
240
- "learning_rate": 2.1147090590742169e-07,
241
- "loss": 1.8311,
242
- "step": 195
243
- },
244
- {
245
- "epoch": 1.0,
246
- "learning_rate": 0.0,
247
- "loss": 1.862,
248
- "step": 200
249
- },
250
  {
251
  "epoch": 1.0,
252
- "eval_loss": 1.699326515197754,
253
- "eval_runtime": 10.3078,
254
- "eval_samples_per_second": 22.992,
255
- "eval_steps_per_second": 2.91,
256
- "step": 200
257
  }
258
  ],
259
- "max_steps": 200,
260
- "num_train_epochs": 1,
261
- "total_flos": 208772333568000.0,
262
  "trial_name": null,
263
  "trial_params": null
264
  }
 
1
  {
2
+ "best_metric": 1.7224986553192139,
3
+ "best_model_checkpoint": "output/pharaoh/checkpoint-167",
4
  "epoch": 1.0,
5
+ "global_step": 167,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
  "epoch": 0.03,
12
+ "learning_rate": 0.00013689676402972123,
13
+ "loss": 1.6111,
14
  "step": 5
15
  },
16
  {
17
+ "epoch": 0.06,
18
+ "learning_rate": 0.0001359897369367762,
19
+ "loss": 1.6407,
20
  "step": 10
21
  },
22
  {
23
+ "epoch": 0.09,
24
+ "learning_rate": 0.00013448693747453513,
25
+ "loss": 1.7296,
26
  "step": 15
27
  },
28
  {
29
+ "epoch": 0.12,
30
+ "learning_rate": 0.0001324016514404635,
31
+ "loss": 1.6351,
32
  "step": 20
33
  },
34
  {
35
+ "epoch": 0.15,
36
+ "learning_rate": 0.00012975231422038792,
37
+ "loss": 1.6431,
38
  "step": 25
39
  },
40
  {
41
+ "epoch": 0.18,
42
+ "learning_rate": 0.0001265623478068018,
43
+ "loss": 1.5934,
44
  "step": 30
45
  },
46
  {
47
+ "epoch": 0.21,
48
+ "learning_rate": 0.00012285995373208355,
49
+ "loss": 1.6383,
50
  "step": 35
51
  },
52
  {
53
+ "epoch": 0.24,
54
+ "learning_rate": 0.00011867786374724195,
55
+ "loss": 1.688,
56
  "step": 40
57
  },
58
  {
59
+ "epoch": 0.27,
60
+ "learning_rate": 0.00011405305045036307,
61
+ "loss": 1.7355,
62
  "step": 45
63
  },
64
  {
65
+ "epoch": 0.3,
66
+ "learning_rate": 0.00010902640042300463,
67
+ "loss": 1.6672,
68
  "step": 50
69
  },
70
  {
71
+ "epoch": 0.33,
72
+ "learning_rate": 0.00010364235276424244,
73
+ "loss": 1.6835,
74
  "step": 55
75
  },
76
  {
77
+ "epoch": 0.36,
78
+ "learning_rate": 9.794850621797833e-05,
79
+ "loss": 1.5821,
80
  "step": 60
81
  },
82
  {
83
+ "epoch": 0.39,
84
+ "learning_rate": 9.199519836678021e-05,
85
+ "loss": 1.701,
86
  "step": 65
87
  },
88
  {
89
+ "epoch": 0.42,
90
+ "learning_rate": 8.583506061247361e-05,
91
+ "loss": 1.6752,
92
  "step": 70
93
  },
94
  {
95
+ "epoch": 0.45,
96
+ "learning_rate": 7.95225528777695e-05,
97
+ "loss": 1.6845,
98
  "step": 75
99
  },
100
  {
101
+ "epoch": 0.48,
102
+ "learning_rate": 7.311348214248791e-05,
103
+ "loss": 1.6935,
104
  "step": 80
105
  },
106
  {
107
+ "epoch": 0.51,
108
+ "learning_rate": 6.666450907085589e-05,
109
+ "loss": 1.7025,
110
  "step": 85
111
  },
112
  {
113
+ "epoch": 0.54,
114
+ "learning_rate": 6.023264709163982e-05,
115
+ "loss": 1.6483,
116
  "step": 90
117
  },
118
  {
119
+ "epoch": 0.57,
120
+ "learning_rate": 5.387475835959079e-05,
121
+ "loss": 1.5692,
122
  "step": 95
123
  },
124
  {
125
+ "epoch": 0.6,
126
+ "learning_rate": 4.7647051054259744e-05,
127
+ "loss": 1.6434,
128
  "step": 100
129
  },
130
  {
131
+ "epoch": 0.63,
132
+ "learning_rate": 4.1604582460408433e-05,
133
+ "loss": 1.661,
134
  "step": 105
135
  },
136
  {
137
+ "epoch": 0.66,
138
+ "learning_rate": 3.580077222313727e-05,
139
+ "loss": 1.6785,
140
  "step": 110
141
  },
142
  {
143
+ "epoch": 0.69,
144
+ "learning_rate": 3.0286930080889863e-05,
145
+ "loss": 1.6299,
146
  "step": 115
147
  },
148
  {
149
+ "epoch": 0.72,
150
+ "learning_rate": 2.511180225150775e-05,
151
+ "loss": 1.6296,
152
  "step": 120
153
  },
154
  {
155
+ "epoch": 0.75,
156
+ "learning_rate": 2.0321140481598987e-05,
157
+ "loss": 1.6131,
158
  "step": 125
159
  },
160
  {
161
+ "epoch": 0.78,
162
+ "learning_rate": 1.595729756912515e-05,
163
+ "loss": 1.6039,
164
  "step": 130
165
  },
166
  {
167
+ "epoch": 0.81,
168
+ "learning_rate": 1.2058852935068877e-05,
169
+ "loss": 1.696,
170
  "step": 135
171
  },
172
  {
173
+ "epoch": 0.84,
174
+ "learning_rate": 8.660271554391817e-06,
175
+ "loss": 1.6846,
176
  "step": 140
177
  },
178
  {
179
+ "epoch": 0.87,
180
+ "learning_rate": 5.791599261570497e-06,
181
+ "loss": 1.6253,
182
  "step": 145
183
  },
184
  {
185
+ "epoch": 0.9,
186
+ "learning_rate": 3.4781971244218827e-06,
187
+ "loss": 1.5759,
188
  "step": 150
189
  },
190
  {
191
+ "epoch": 0.93,
192
+ "learning_rate": 1.7405172345409156e-06,
193
+ "loss": 1.628,
194
  "step": 155
195
  },
196
  {
197
+ "epoch": 0.96,
198
+ "learning_rate": 5.939218965176846e-07,
199
+ "loss": 1.6359,
200
  "step": 160
201
  },
202
  {
203
+ "epoch": 0.99,
204
+ "learning_rate": 4.8547814429227996e-08,
205
+ "loss": 1.614,
206
  "step": 165
207
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
208
  {
209
  "epoch": 1.0,
210
+ "eval_loss": 1.7224986553192139,
211
+ "eval_runtime": 22.2232,
212
+ "eval_samples_per_second": 22.454,
213
+ "eval_steps_per_second": 2.835,
214
+ "step": 167
215
  }
216
  ],
217
+ "max_steps": 334,
218
+ "num_train_epochs": 2,
219
+ "total_flos": 174412431360000.0,
220
  "trial_name": null,
221
  "trial_params": null
222
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd6418f8aaf396178babd67aee6ff50924acc2cdcb8097797e754c240edd3780
3
- size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0544ae26ed18e0692923459bb51b32637aeec0f8caa1502a1b6bf0910ecd6d
3
+ size 2863