jane102350 commited on
Commit
cb636b5
1 Parent(s): ba69eaf

Model save

Browse files
README.md CHANGED
@@ -2,8 +2,6 @@
2
  license: cc-by-nc-4.0
3
  library_name: peft
4
  tags:
5
- - text-to-audio
6
- - tiny-kazakh-dombra
7
  - generated_from_trainer
8
  base_model: facebook/musicgen-melody
9
  model-index:
@@ -16,7 +14,7 @@ should probably proofread and complete it, then remove this comment. -->
16
 
17
  # musicgen-melody-lora-kk-colab
18
 
19
- This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on the kk-lib-dataset dataset.
20
 
21
  ## Model description
22
 
@@ -43,7 +41,7 @@ The following hyperparameters were used during training:
43
  - total_train_batch_size: 16
44
  - optimizer: Adam with betas=(0.9,0.99) and epsilon=1e-08
45
  - lr_scheduler_type: linear
46
- - num_epochs: 6
47
  - mixed_precision_training: Native AMP
48
 
49
  ### Training results
 
2
  license: cc-by-nc-4.0
3
  library_name: peft
4
  tags:
 
 
5
  - generated_from_trainer
6
  base_model: facebook/musicgen-melody
7
  model-index:
 
14
 
15
  # musicgen-melody-lora-kk-colab
16
 
17
+ This model is a fine-tuned version of [facebook/musicgen-melody](https://huggingface.co/facebook/musicgen-melody) on an unknown dataset.
18
 
19
  ## Model description
20
 
 
41
  - total_train_batch_size: 16
42
  - optimizer: Adam with betas=(0.9,0.99) and epsilon=1e-08
43
  - lr_scheduler_type: linear
44
+ - num_epochs: 4
45
  - mixed_precision_training: Native AMP
46
 
47
  ### Training results
adapter_config.json CHANGED
@@ -23,22 +23,22 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "lm_heads.1",
27
- "lm_heads.2",
28
- "lm_heads.3",
29
- "embed_tokens.1",
30
- "k_proj",
31
- "embed_tokens.2",
32
- "enc_to_dec_proj",
33
- "fc2",
34
- "embed_tokens.0",
35
  "fc1",
36
- "out_proj",
37
- "embed_tokens.3",
38
  "q_proj",
39
- "v_proj",
40
  "audio_enc_to_dec_proj",
41
- "lm_heads.0"
 
 
 
 
 
 
 
 
 
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "lm_heads.0",
 
 
 
 
 
 
 
 
27
  "fc1",
28
+ "enc_to_dec_proj",
 
29
  "q_proj",
30
+ "embed_tokens.0",
31
  "audio_enc_to_dec_proj",
32
+ "v_proj",
33
+ "embed_tokens.2",
34
+ "k_proj",
35
+ "out_proj",
36
+ "lm_heads.1",
37
+ "embed_tokens.1",
38
+ "lm_heads.3",
39
+ "lm_heads.2",
40
+ "fc2",
41
+ "embed_tokens.3"
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:159579bfc15452bfd47b7cbbd633cf984348364c6a1f290e8424c83a407ad9f1
3
  size 87103456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32708f702bacb967741f0a4a97e4676951a27eb1e877c53f6bd51b1e3f258651
3
  size 87103456
trainer_state.json CHANGED
@@ -1,111 +1,167 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.923076923076923,
5
  "eval_steps": 500,
6
- "global_step": 24,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.41025641025641024,
13
- "grad_norm": 1.4702763557434082,
14
- "learning_rate": 0.00018333333333333334,
15
- "loss": 9.483,
16
  "step": 2
17
  },
18
  {
19
- "epoch": 0.8205128205128205,
20
- "grad_norm": 2.237931966781616,
21
- "learning_rate": 0.0001666666666666667,
22
- "loss": 8.8833,
23
  "step": 4
24
  },
25
  {
26
- "epoch": 1.2307692307692308,
27
- "grad_norm": 2.2123219966888428,
28
- "learning_rate": 0.00015000000000000001,
29
- "loss": 8.119,
30
  "step": 6
31
  },
32
  {
33
- "epoch": 1.641025641025641,
34
- "grad_norm": 2.0387604236602783,
35
- "learning_rate": 0.00013333333333333334,
36
- "loss": 7.6202,
37
  "step": 8
38
  },
39
  {
40
- "epoch": 2.051282051282051,
41
- "grad_norm": 1.8973811864852905,
42
- "learning_rate": 0.00011666666666666668,
43
- "loss": 6.9965,
44
  "step": 10
45
  },
46
  {
47
- "epoch": 2.4615384615384617,
48
- "grad_norm": 1.8106874227523804,
49
- "learning_rate": 0.0001,
50
- "loss": 6.8357,
51
  "step": 12
52
  },
53
  {
54
- "epoch": 2.871794871794872,
55
- "grad_norm": 1.7481627464294434,
56
- "learning_rate": 8.333333333333334e-05,
57
- "loss": 6.7018,
58
  "step": 14
59
  },
60
  {
61
- "epoch": 3.282051282051282,
62
- "grad_norm": 0.9489405155181885,
63
- "learning_rate": 6.666666666666667e-05,
64
- "loss": 6.7105,
65
  "step": 16
66
  },
67
  {
68
- "epoch": 3.6923076923076925,
69
- "grad_norm": 1.1118030548095703,
70
- "learning_rate": 5e-05,
71
- "loss": 6.4967,
72
  "step": 18
73
  },
74
  {
75
- "epoch": 4.102564102564102,
76
- "grad_norm": 1.162569522857666,
77
- "learning_rate": 3.3333333333333335e-05,
78
- "loss": 6.6138,
79
  "step": 20
80
  },
81
  {
82
- "epoch": 4.512820512820513,
83
- "grad_norm": 1.045633316040039,
84
- "learning_rate": 1.6666666666666667e-05,
85
- "loss": 6.4841,
86
  "step": 22
87
  },
88
  {
89
- "epoch": 4.923076923076923,
90
- "grad_norm": 0.8710927367210388,
91
- "learning_rate": 0.0,
92
- "loss": 6.5578,
93
  "step": 24
94
  },
95
  {
96
- "epoch": 4.923076923076923,
97
- "step": 24,
98
- "total_flos": 147993926084196.0,
99
- "train_loss": 7.291865587234497,
100
- "train_runtime": 97.928,
101
- "train_samples_per_second": 4.718,
102
- "train_steps_per_second": 0.245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
  ],
105
  "logging_steps": 2,
106
- "max_steps": 24,
107
  "num_input_tokens_seen": 0,
108
- "num_train_epochs": 6,
109
  "save_steps": 500,
110
  "stateful_callbacks": {
111
  "TrainerControl": {
@@ -119,7 +175,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 147993926084196.0,
123
  "train_batch_size": 2,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.8095238095238093,
5
  "eval_steps": 500,
6
+ "global_step": 40,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.19047619047619047,
13
+ "grad_norm": 1.0338395833969116,
14
+ "learning_rate": 0.00019,
15
+ "loss": 9.5594,
16
  "step": 2
17
  },
18
  {
19
+ "epoch": 0.38095238095238093,
20
+ "grad_norm": 1.250166654586792,
21
+ "learning_rate": 0.00018,
22
+ "loss": 9.2309,
23
  "step": 4
24
  },
25
  {
26
+ "epoch": 0.5714285714285714,
27
+ "grad_norm": 1.9235066175460815,
28
+ "learning_rate": 0.00017,
29
+ "loss": 8.8061,
30
  "step": 6
31
  },
32
  {
33
+ "epoch": 0.7619047619047619,
34
+ "grad_norm": 2.2285144329071045,
35
+ "learning_rate": 0.00016,
36
+ "loss": 8.2859,
37
  "step": 8
38
  },
39
  {
40
+ "epoch": 0.9523809523809523,
41
+ "grad_norm": 1.434334397315979,
42
+ "learning_rate": 0.00015000000000000001,
43
+ "loss": 7.9618,
44
  "step": 10
45
  },
46
  {
47
+ "epoch": 1.1428571428571428,
48
+ "grad_norm": 1.1094856262207031,
49
+ "learning_rate": 0.00014,
50
+ "loss": 7.7526,
51
  "step": 12
52
  },
53
  {
54
+ "epoch": 1.3333333333333333,
55
+ "grad_norm": 0.9047002196311951,
56
+ "learning_rate": 0.00013000000000000002,
57
+ "loss": 7.5692,
58
  "step": 14
59
  },
60
  {
61
+ "epoch": 1.5238095238095237,
62
+ "grad_norm": 1.0015796422958374,
63
+ "learning_rate": 0.00012,
64
+ "loss": 7.4736,
65
  "step": 16
66
  },
67
  {
68
+ "epoch": 1.7142857142857144,
69
+ "grad_norm": 1.095115065574646,
70
+ "learning_rate": 0.00011000000000000002,
71
+ "loss": 7.487,
72
  "step": 18
73
  },
74
  {
75
+ "epoch": 1.9047619047619047,
76
+ "grad_norm": 0.8204529285430908,
77
+ "learning_rate": 0.0001,
78
+ "loss": 7.4386,
79
  "step": 20
80
  },
81
  {
82
+ "epoch": 2.0952380952380953,
83
+ "grad_norm": 0.8284938931465149,
84
+ "learning_rate": 9e-05,
85
+ "loss": 7.446,
86
  "step": 22
87
  },
88
  {
89
+ "epoch": 2.2857142857142856,
90
+ "grad_norm": 0.8024454712867737,
91
+ "learning_rate": 8e-05,
92
+ "loss": 7.3784,
93
  "step": 24
94
  },
95
  {
96
+ "epoch": 2.4761904761904763,
97
+ "grad_norm": 1.7449926137924194,
98
+ "learning_rate": 7e-05,
99
+ "loss": 7.2682,
100
+ "step": 26
101
+ },
102
+ {
103
+ "epoch": 2.6666666666666665,
104
+ "grad_norm": 0.6010074019432068,
105
+ "learning_rate": 6e-05,
106
+ "loss": 7.317,
107
+ "step": 28
108
+ },
109
+ {
110
+ "epoch": 2.857142857142857,
111
+ "grad_norm": 0.7939924597740173,
112
+ "learning_rate": 5e-05,
113
+ "loss": 7.3338,
114
+ "step": 30
115
+ },
116
+ {
117
+ "epoch": 3.0476190476190474,
118
+ "grad_norm": 0.5959632992744446,
119
+ "learning_rate": 4e-05,
120
+ "loss": 7.2549,
121
+ "step": 32
122
+ },
123
+ {
124
+ "epoch": 3.238095238095238,
125
+ "grad_norm": 0.780635416507721,
126
+ "learning_rate": 3e-05,
127
+ "loss": 7.2918,
128
+ "step": 34
129
+ },
130
+ {
131
+ "epoch": 3.4285714285714284,
132
+ "grad_norm": 0.7039668560028076,
133
+ "learning_rate": 2e-05,
134
+ "loss": 7.3645,
135
+ "step": 36
136
+ },
137
+ {
138
+ "epoch": 3.619047619047619,
139
+ "grad_norm": 0.5156592726707458,
140
+ "learning_rate": 1e-05,
141
+ "loss": 7.2357,
142
+ "step": 38
143
+ },
144
+ {
145
+ "epoch": 3.8095238095238093,
146
+ "grad_norm": 0.4115481376647949,
147
+ "learning_rate": 0.0,
148
+ "loss": 7.2353,
149
+ "step": 40
150
+ },
151
+ {
152
+ "epoch": 3.8095238095238093,
153
+ "step": 40,
154
+ "total_flos": 200497171101768.0,
155
+ "train_loss": 7.734543776512146,
156
+ "train_runtime": 169.1152,
157
+ "train_samples_per_second": 3.974,
158
+ "train_steps_per_second": 0.237
159
  }
160
  ],
161
  "logging_steps": 2,
162
+ "max_steps": 40,
163
  "num_input_tokens_seen": 0,
164
+ "num_train_epochs": 4,
165
  "save_steps": 500,
166
  "stateful_callbacks": {
167
  "TrainerControl": {
 
175
  "attributes": {}
176
  }
177
  },
178
+ "total_flos": 200497171101768.0,
179
  "train_batch_size": 2,
180
  "trial_name": null,
181
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:49be13def91f22efe68e65d1225930bebbfe19045c18e418191043383c5f11a1
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:091fda1468daebf50659615f89bc02221b34bb65e1f9d2cc74d4a9107eed96a0
3
  size 5304