AlekseyKorshuk commited on
Commit
72f33e4
1 Parent(s): 1b6f2d8

huggingartists

Browse files
README.md CHANGED
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/2g9p829k/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3cvafvz3) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3cvafvz3/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
 
45
  dataset = load_dataset("huggingartists/morgenshtern")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11daksqo/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on MORGENSHTERN's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/3hprs98u) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/3hprs98u/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 1.0789222717285156, "eval_runtime": 7.246, "eval_samples_per_second": 20.701, "eval_steps_per_second": 2.622, "epoch": 16.0}
 
1
+ {"eval_loss": 0.9612834453582764, "eval_runtime": 6.7467, "eval_samples_per_second": 22.53, "eval_steps_per_second": 2.816, "epoch": 2.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9654bfbbbc16c001bf3ca7f1bde7b170d24b36f207e71c7d64f5e319546120e9
3
  size 497764120
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2c263070fbf5f7efe3e75dd578e814f49ec64bc199e43a3f45edb236d61fb34
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d310145beb326e0a9d566eb5622927be8d76bfb9430b33c32dd51b5c7a06790
3
  size 995604017
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:51f82cb2b14e8b08c2a0932e2579488a4a9bec1b0a08c7c5585837660c35aff9
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ec149b6dfd697bf23788055aefe01a7ddb91b8831442e474a5c68c182988271b
3
  size 510403817
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65bad4addbe634edd37474cda521dc103ca5eb49f77dbe70541dd72d3250de98
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:160d145d45171c3680d1a0f735e7dba3ead27f25402db68bfdfe08b789e7823a
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:862a8301ff1a624ea0fb015628a4060abb8cdf66d8bd2a773af256e1cef63c1e
3
  size 14567
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c82347c45c6ff6d74c3b0b79bef318683f5527c56da1c685e3b3ba8d34edddd5
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c6d0bf6916b2b0bc8b9baa1851e61696a7abf151886205b1aec182b9abcf338
3
  size 623
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 1.0789222717285156,
3
- "best_model_checkpoint": "output/morgenshtern/checkpoint-97",
4
- "epoch": 1.0,
5
- "global_step": 97,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -128,11 +128,133 @@
128
  "eval_samples_per_second": 21.035,
129
  "eval_steps_per_second": 2.664,
130
  "step": 97
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
131
  }
132
  ],
133
- "max_steps": 1552,
134
- "num_train_epochs": 16,
135
- "total_flos": 101381308416000.0,
136
  "trial_name": null,
137
  "trial_params": null
138
  }
 
1
  {
2
+ "best_metric": 0.9612834453582764,
3
+ "best_model_checkpoint": "output/morgenshtern/checkpoint-194",
4
+ "epoch": 2.0,
5
+ "global_step": 194,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
128
  "eval_samples_per_second": 21.035,
129
  "eval_steps_per_second": 2.664,
130
  "step": 97
131
+ },
132
+ {
133
+ "epoch": 1.03,
134
+ "learning_rate": 3.2355730797025283e-07,
135
+ "loss": 1.094,
136
+ "step": 100
137
+ },
138
+ {
139
+ "epoch": 1.08,
140
+ "learning_rate": 2.289809668554777e-06,
141
+ "loss": 1.173,
142
+ "step": 105
143
+ },
144
+ {
145
+ "epoch": 1.13,
146
+ "learning_rate": 5.991173097174171e-06,
147
+ "loss": 1.1401,
148
+ "step": 110
149
+ },
150
+ {
151
+ "epoch": 1.19,
152
+ "learning_rate": 1.1330795562191737e-05,
153
+ "loss": 1.1561,
154
+ "step": 115
155
+ },
156
+ {
157
+ "epoch": 1.24,
158
+ "learning_rate": 1.816895739168569e-05,
159
+ "loss": 1.103,
160
+ "step": 120
161
+ },
162
+ {
163
+ "epoch": 1.29,
164
+ "learning_rate": 2.6326727259776298e-05,
165
+ "loss": 1.0156,
166
+ "step": 125
167
+ },
168
+ {
169
+ "epoch": 1.34,
170
+ "learning_rate": 3.559064420793063e-05,
171
+ "loss": 1.047,
172
+ "step": 130
173
+ },
174
+ {
175
+ "epoch": 1.39,
176
+ "learning_rate": 4.571830318875349e-05,
177
+ "loss": 1.0605,
178
+ "step": 135
179
+ },
180
+ {
181
+ "epoch": 1.44,
182
+ "learning_rate": 5.644469797766504e-05,
183
+ "loss": 1.0732,
184
+ "step": 140
185
+ },
186
+ {
187
+ "epoch": 1.49,
188
+ "learning_rate": 6.748915548014806e-05,
189
+ "loss": 1.0798,
190
+ "step": 145
191
+ },
192
+ {
193
+ "epoch": 1.55,
194
+ "learning_rate": 7.856267998744024e-05,
195
+ "loss": 1.132,
196
+ "step": 150
197
+ },
198
+ {
199
+ "epoch": 1.6,
200
+ "learning_rate": 8.937551520659945e-05,
201
+ "loss": 1.097,
202
+ "step": 155
203
+ },
204
+ {
205
+ "epoch": 1.65,
206
+ "learning_rate": 9.964472619245533e-05,
207
+ "loss": 1.1685,
208
+ "step": 160
209
+ },
210
+ {
211
+ "epoch": 1.7,
212
+ "learning_rate": 0.00010910160278820622,
213
+ "loss": 1.0965,
214
+ "step": 165
215
+ },
216
+ {
217
+ "epoch": 1.75,
218
+ "learning_rate": 0.00011749869085191239,
219
+ "loss": 1.0409,
220
+ "step": 170
221
+ },
222
+ {
223
+ "epoch": 1.8,
224
+ "learning_rate": 0.00012461626728572453,
225
+ "loss": 1.2079,
226
+ "step": 175
227
+ },
228
+ {
229
+ "epoch": 1.86,
230
+ "learning_rate": 0.000130268089438458,
231
+ "loss": 1.109,
232
+ "step": 180
233
+ },
234
+ {
235
+ "epoch": 1.91,
236
+ "learning_rate": 0.00013430626843929596,
237
+ "loss": 1.1506,
238
+ "step": 185
239
+ },
240
+ {
241
+ "epoch": 1.96,
242
+ "learning_rate": 0.00013662513894413276,
243
+ "loss": 1.074,
244
+ "step": 190
245
+ },
246
+ {
247
+ "epoch": 2.0,
248
+ "eval_loss": 0.9612834453582764,
249
+ "eval_runtime": 6.602,
250
+ "eval_samples_per_second": 23.023,
251
+ "eval_steps_per_second": 2.878,
252
+ "step": 194
253
  }
254
  ],
255
+ "max_steps": 194,
256
+ "num_train_epochs": 2,
257
+ "total_flos": 202501324800000.0,
258
  "trial_name": null,
259
  "trial_params": null
260
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5497b3aaf0bdb0eca71885ab53b83d8d3468893d10b8e506f44d16425afcbf44
3
  size 2671
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4e57e184be42952f12edd861103c10f3a3cdd1a9bd01f214495daf9092e23e9
3
  size 2671