jane102350 commited on
Commit
851dcb6
1 Parent(s): 71c35f5

End of training

Browse files
Files changed (2) hide show
  1. README.md +3 -1
  2. trainer_state.json +282 -72
README.md CHANGED
@@ -2,6 +2,8 @@
2
  license: cc-by-nc-4.0
3
  library_name: peft
4
  tags:
 
 
5
  - generated_from_trainer
6
  base_model: facebook/musicgen-melody
7
  model-index:
@@ -14,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
14
 
15
  # musicgen-melody-lora-kk-colab
16
 
17
- This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
2
  license: cc-by-nc-4.0
3
  library_name: peft
4
  tags:
5
+ - text-to-audio
6
+ - tiny-kk
7
  - generated_from_trainer
8
  base_model: facebook/musicgen-melody
9
  model-index:
 
16
 
17
  # musicgen-melody-lora-kk-colab
18
 
19
+ This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on the kk/tiny-kk dataset.
20
 
21
  ## Model description
22
 
trainer_state.json CHANGED
@@ -1,167 +1,377 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.8095238095238093,
5
  "eval_steps": 500,
6
- "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.19047619047619047,
13
- "grad_norm": 1.1883195638656616,
14
- "learning_rate": 0.00019,
15
- "loss": 9.559,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.38095238095238093,
20
- "grad_norm": 1.2496156692504883,
21
- "learning_rate": 0.00018,
22
- "loss": 9.2294,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.5714285714285714,
27
- "grad_norm": 1.8853394985198975,
28
- "learning_rate": 0.00017,
29
- "loss": 8.807,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 0.7619047619047619,
34
- "grad_norm": 2.196770429611206,
35
- "learning_rate": 0.00016,
36
- "loss": 8.2892,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 0.9523809523809523,
41
- "grad_norm": 1.4324467182159424,
42
- "learning_rate": 0.00015000000000000001,
43
- "loss": 7.969,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.1428571428571428,
48
- "grad_norm": 1.119658350944519,
49
- "learning_rate": 0.00014,
50
- "loss": 7.756,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 1.3333333333333333,
55
- "grad_norm": 0.9036475419998169,
56
- "learning_rate": 0.00013000000000000002,
57
- "loss": 7.5698,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 1.5238095238095237,
62
- "grad_norm": 0.991559624671936,
63
- "learning_rate": 0.00012,
64
- "loss": 7.4717,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 1.7142857142857144,
69
- "grad_norm": 1.0949680805206299,
70
- "learning_rate": 0.00011000000000000002,
71
- "loss": 7.4879,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 1.9047619047619047,
76
- "grad_norm": 0.8211305737495422,
77
- "learning_rate": 0.0001,
78
- "loss": 7.4386,
79
  "step": 20
80
  },
81
  {
82
  "epoch": 2.0952380952380953,
83
- "grad_norm": 0.8081773519515991,
84
- "learning_rate": 9e-05,
85
- "loss": 7.4447,
86
  "step": 22
87
  },
88
  {
89
  "epoch": 2.2857142857142856,
90
- "grad_norm": 0.8093952536582947,
91
- "learning_rate": 8e-05,
92
- "loss": 7.3775,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 2.4761904761904763,
97
- "grad_norm": 1.7485935688018799,
98
- "learning_rate": 7e-05,
99
- "loss": 7.2698,
100
  "step": 26
101
  },
102
  {
103
  "epoch": 2.6666666666666665,
104
- "grad_norm": 0.5961379408836365,
105
- "learning_rate": 6e-05,
106
- "loss": 7.3169,
107
  "step": 28
108
  },
109
  {
110
  "epoch": 2.857142857142857,
111
- "grad_norm": 0.7973180413246155,
112
- "learning_rate": 5e-05,
113
- "loss": 7.3327,
114
  "step": 30
115
  },
116
  {
117
  "epoch": 3.0476190476190474,
118
- "grad_norm": 0.5895470976829529,
119
- "learning_rate": 4e-05,
120
- "loss": 7.2551,
121
  "step": 32
122
  },
123
  {
124
  "epoch": 3.238095238095238,
125
- "grad_norm": 0.7729610204696655,
126
- "learning_rate": 3e-05,
127
- "loss": 7.2926,
128
  "step": 34
129
  },
130
  {
131
  "epoch": 3.4285714285714284,
132
- "grad_norm": 0.707767903804779,
133
- "learning_rate": 2e-05,
134
- "loss": 7.3648,
135
  "step": 36
136
  },
137
  {
138
  "epoch": 3.619047619047619,
139
- "grad_norm": 0.5094689130783081,
140
- "learning_rate": 1e-05,
141
- "loss": 7.2383,
142
  "step": 38
143
  },
144
  {
145
  "epoch": 3.8095238095238093,
146
- "grad_norm": 0.4048698842525482,
147
- "learning_rate": 0.0,
148
- "loss": 7.2355,
149
  "step": 40
150
  },
151
  {
152
- "epoch": 3.8095238095238093,
153
- "step": 40,
154
- "total_flos": 200497171101768.0,
155
- "train_loss": 7.735274028778076,
156
- "train_runtime": 169.6422,
157
- "train_samples_per_second": 3.961,
158
- "train_steps_per_second": 0.236
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
159
  }
160
  ],
161
  "logging_steps": 2,
162
- "max_steps": 40,
163
  "num_input_tokens_seen": 0,
164
- "num_train_epochs": 4,
165
  "save_steps": 500,
166
  "stateful_callbacks": {
167
  "TrainerControl": {
@@ -175,7 +385,7 @@
175
  "attributes": {}
176
  }
177
  },
178
- "total_flos": 200497171101768.0,
179
  "train_batch_size": 2,
180
  "trial_name": null,
181
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.523809523809524,
5
  "eval_steps": 500,
6
+ "global_step": 100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.19047619047619047,
13
+ "grad_norm": 1.0693126916885376,
14
+ "learning_rate": 0.000196,
15
+ "loss": 9.5582,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.38095238095238093,
20
+ "grad_norm": 1.2914847135543823,
21
+ "learning_rate": 0.000192,
22
+ "loss": 9.2227,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.5714285714285714,
27
+ "grad_norm": 1.9945707321166992,
28
+ "learning_rate": 0.000188,
29
+ "loss": 8.7759,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 0.7619047619047619,
34
+ "grad_norm": 2.011664628982544,
35
+ "learning_rate": 0.00018400000000000003,
36
+ "loss": 8.2142,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 0.9523809523809523,
41
+ "grad_norm": 1.3491569757461548,
42
+ "learning_rate": 0.00018,
43
+ "loss": 7.9009,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.1428571428571428,
48
+ "grad_norm": 0.9671052098274231,
49
+ "learning_rate": 0.00017600000000000002,
50
+ "loss": 7.6909,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 1.3333333333333333,
55
+ "grad_norm": 1.116225004196167,
56
+ "learning_rate": 0.000172,
57
+ "loss": 7.5216,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 1.5238095238095237,
62
+ "grad_norm": 1.0181453227996826,
63
+ "learning_rate": 0.000168,
64
+ "loss": 7.4308,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 1.7142857142857144,
69
+ "grad_norm": 0.9790288209915161,
70
+ "learning_rate": 0.000164,
71
+ "loss": 7.4444,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 1.9047619047619047,
76
+ "grad_norm": 0.9529135823249817,
77
+ "learning_rate": 0.00016,
78
+ "loss": 7.3866,
79
  "step": 20
80
  },
81
  {
82
  "epoch": 2.0952380952380953,
83
+ "grad_norm": 0.7934174537658691,
84
+ "learning_rate": 0.00015600000000000002,
85
+ "loss": 7.3822,
86
  "step": 22
87
  },
88
  {
89
  "epoch": 2.2857142857142856,
90
+ "grad_norm": 0.7163369059562683,
91
+ "learning_rate": 0.000152,
92
+ "loss": 7.3191,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 2.4761904761904763,
97
+ "grad_norm": 1.347898244857788,
98
+ "learning_rate": 0.000148,
99
+ "loss": 7.2142,
100
  "step": 26
101
  },
102
  {
103
  "epoch": 2.6666666666666665,
104
+ "grad_norm": 0.7439594864845276,
105
+ "learning_rate": 0.000144,
106
+ "loss": 7.2718,
107
  "step": 28
108
  },
109
  {
110
  "epoch": 2.857142857142857,
111
+ "grad_norm": 1.4523087739944458,
112
+ "learning_rate": 0.00014,
113
+ "loss": 7.2901,
114
  "step": 30
115
  },
116
  {
117
  "epoch": 3.0476190476190474,
118
+ "grad_norm": 0.7053799033164978,
119
+ "learning_rate": 0.00013600000000000003,
120
+ "loss": 7.2125,
121
  "step": 32
122
  },
123
  {
124
  "epoch": 3.238095238095238,
125
+ "grad_norm": 0.6156577467918396,
126
+ "learning_rate": 0.000132,
127
+ "loss": 7.229,
128
  "step": 34
129
  },
130
  {
131
  "epoch": 3.4285714285714284,
132
+ "grad_norm": 0.40743571519851685,
133
+ "learning_rate": 0.00012800000000000002,
134
+ "loss": 7.2999,
135
  "step": 36
136
  },
137
  {
138
  "epoch": 3.619047619047619,
139
+ "grad_norm": 1.0756566524505615,
140
+ "learning_rate": 0.000124,
141
+ "loss": 7.212,
142
  "step": 38
143
  },
144
  {
145
  "epoch": 3.8095238095238093,
146
+ "grad_norm": 0.7201813459396362,
147
+ "learning_rate": 0.00012,
148
+ "loss": 7.1826,
149
  "step": 40
150
  },
151
  {
152
+ "epoch": 4.0,
153
+ "grad_norm": 0.5773327946662903,
154
+ "learning_rate": 0.000116,
155
+ "loss": 7.1456,
156
+ "step": 42
157
+ },
158
+ {
159
+ "epoch": 4.190476190476191,
160
+ "grad_norm": 0.6004664301872253,
161
+ "learning_rate": 0.00011200000000000001,
162
+ "loss": 7.0349,
163
+ "step": 44
164
+ },
165
+ {
166
+ "epoch": 4.380952380952381,
167
+ "grad_norm": 1.4157112836837769,
168
+ "learning_rate": 0.00010800000000000001,
169
+ "loss": 7.2767,
170
+ "step": 46
171
+ },
172
+ {
173
+ "epoch": 4.571428571428571,
174
+ "grad_norm": 0.852541446685791,
175
+ "learning_rate": 0.00010400000000000001,
176
+ "loss": 7.1745,
177
+ "step": 48
178
+ },
179
+ {
180
+ "epoch": 4.761904761904762,
181
+ "grad_norm": 0.7835370898246765,
182
+ "learning_rate": 0.0001,
183
+ "loss": 7.2477,
184
+ "step": 50
185
+ },
186
+ {
187
+ "epoch": 4.9523809523809526,
188
+ "grad_norm": 0.414756178855896,
189
+ "learning_rate": 9.6e-05,
190
+ "loss": 7.1781,
191
+ "step": 52
192
+ },
193
+ {
194
+ "epoch": 5.142857142857143,
195
+ "grad_norm": 0.7038145661354065,
196
+ "learning_rate": 9.200000000000001e-05,
197
+ "loss": 7.1103,
198
+ "step": 54
199
+ },
200
+ {
201
+ "epoch": 5.333333333333333,
202
+ "grad_norm": 0.8368222713470459,
203
+ "learning_rate": 8.800000000000001e-05,
204
+ "loss": 7.1595,
205
+ "step": 56
206
+ },
207
+ {
208
+ "epoch": 5.523809523809524,
209
+ "grad_norm": 0.6943209171295166,
210
+ "learning_rate": 8.4e-05,
211
+ "loss": 7.1062,
212
+ "step": 58
213
+ },
214
+ {
215
+ "epoch": 5.714285714285714,
216
+ "grad_norm": 0.4186341464519501,
217
+ "learning_rate": 8e-05,
218
+ "loss": 7.2348,
219
+ "step": 60
220
+ },
221
+ {
222
+ "epoch": 5.904761904761905,
223
+ "grad_norm": 1.0224595069885254,
224
+ "learning_rate": 7.6e-05,
225
+ "loss": 7.1154,
226
+ "step": 62
227
+ },
228
+ {
229
+ "epoch": 6.095238095238095,
230
+ "grad_norm": 0.428688645362854,
231
+ "learning_rate": 7.2e-05,
232
+ "loss": 7.1194,
233
+ "step": 64
234
+ },
235
+ {
236
+ "epoch": 6.285714285714286,
237
+ "grad_norm": 0.913233757019043,
238
+ "learning_rate": 6.800000000000001e-05,
239
+ "loss": 7.1919,
240
+ "step": 66
241
+ },
242
+ {
243
+ "epoch": 6.476190476190476,
244
+ "grad_norm": 0.5481642484664917,
245
+ "learning_rate": 6.400000000000001e-05,
246
+ "loss": 7.126,
247
+ "step": 68
248
+ },
249
+ {
250
+ "epoch": 6.666666666666667,
251
+ "grad_norm": 0.49522772431373596,
252
+ "learning_rate": 6e-05,
253
+ "loss": 7.1564,
254
+ "step": 70
255
+ },
256
+ {
257
+ "epoch": 6.857142857142857,
258
+ "grad_norm": 0.40602990984916687,
259
+ "learning_rate": 5.6000000000000006e-05,
260
+ "loss": 7.081,
261
+ "step": 72
262
+ },
263
+ {
264
+ "epoch": 7.0476190476190474,
265
+ "grad_norm": 0.4593268036842346,
266
+ "learning_rate": 5.2000000000000004e-05,
267
+ "loss": 7.0596,
268
+ "step": 74
269
+ },
270
+ {
271
+ "epoch": 7.238095238095238,
272
+ "grad_norm": 0.44626158475875854,
273
+ "learning_rate": 4.8e-05,
274
+ "loss": 7.0674,
275
+ "step": 76
276
+ },
277
+ {
278
+ "epoch": 7.428571428571429,
279
+ "grad_norm": 0.6573432087898254,
280
+ "learning_rate": 4.4000000000000006e-05,
281
+ "loss": 7.0745,
282
+ "step": 78
283
+ },
284
+ {
285
+ "epoch": 7.619047619047619,
286
+ "grad_norm": 0.3820817172527313,
287
+ "learning_rate": 4e-05,
288
+ "loss": 7.0785,
289
+ "step": 80
290
+ },
291
+ {
292
+ "epoch": 7.809523809523809,
293
+ "grad_norm": 0.8610634803771973,
294
+ "learning_rate": 3.6e-05,
295
+ "loss": 7.1974,
296
+ "step": 82
297
+ },
298
+ {
299
+ "epoch": 8.0,
300
+ "grad_norm": 0.44188380241394043,
301
+ "learning_rate": 3.2000000000000005e-05,
302
+ "loss": 7.0847,
303
+ "step": 84
304
+ },
305
+ {
306
+ "epoch": 8.19047619047619,
307
+ "grad_norm": 0.6792606711387634,
308
+ "learning_rate": 2.8000000000000003e-05,
309
+ "loss": 7.1191,
310
+ "step": 86
311
+ },
312
+ {
313
+ "epoch": 8.380952380952381,
314
+ "grad_norm": 0.4903930723667145,
315
+ "learning_rate": 2.4e-05,
316
+ "loss": 7.1288,
317
+ "step": 88
318
+ },
319
+ {
320
+ "epoch": 8.571428571428571,
321
+ "grad_norm": 0.5853165984153748,
322
+ "learning_rate": 2e-05,
323
+ "loss": 7.0479,
324
+ "step": 90
325
+ },
326
+ {
327
+ "epoch": 8.761904761904763,
328
+ "grad_norm": 0.6836739182472229,
329
+ "learning_rate": 1.6000000000000003e-05,
330
+ "loss": 7.0448,
331
+ "step": 92
332
+ },
333
+ {
334
+ "epoch": 8.952380952380953,
335
+ "grad_norm": 0.5737291574478149,
336
+ "learning_rate": 1.2e-05,
337
+ "loss": 7.0717,
338
+ "step": 94
339
+ },
340
+ {
341
+ "epoch": 9.142857142857142,
342
+ "grad_norm": 1.709892988204956,
343
+ "learning_rate": 8.000000000000001e-06,
344
+ "loss": 6.9482,
345
+ "step": 96
346
+ },
347
+ {
348
+ "epoch": 9.333333333333334,
349
+ "grad_norm": 0.61203932762146,
350
+ "learning_rate": 4.000000000000001e-06,
351
+ "loss": 7.1598,
352
+ "step": 98
353
+ },
354
+ {
355
+ "epoch": 9.523809523809524,
356
+ "grad_norm": 0.3827505111694336,
357
+ "learning_rate": 0.0,
358
+ "loss": 7.1112,
359
+ "step": 100
360
+ },
361
+ {
362
+ "epoch": 9.523809523809524,
363
+ "step": 100,
364
+ "total_flos": 500328301455504.0,
365
+ "train_loss": 7.34823148727417,
366
+ "train_runtime": 426.3789,
367
+ "train_samples_per_second": 3.94,
368
+ "train_steps_per_second": 0.235
369
  }
370
  ],
371
  "logging_steps": 2,
372
+ "max_steps": 100,
373
  "num_input_tokens_seen": 0,
374
+ "num_train_epochs": 10,
375
  "save_steps": 500,
376
  "stateful_callbacks": {
377
  "TrainerControl": {
 
385
  "attributes": {}
386
  }
387
  },
388
+ "total_flos": 500328301455504.0,
389
  "train_batch_size": 2,
390
  "trial_name": null,
391
  "trial_params": null