adamjweintraut commited on
Commit
c5a324e
1 Parent(s): d086ddf

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -70,5 +70,5 @@
70
  "torch_dtype": "float32",
71
  "transformers_version": "4.40.0.dev0",
72
  "use_cache": true,
73
- "vocab_size": 50286
74
  }
 
70
  "torch_dtype": "float32",
71
  "transformers_version": "4.40.0.dev0",
72
  "use_cache": true,
73
+ "vocab_size": 50289
74
  }
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df31b4a5e56132ea8ae35ed1bd4dea2f3c52f182a000a06969a4066c5c7514f6
3
- size 1625513096
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01bcdb51423150a134e6e9e538e83ac0241e437ee06e4f207a1520097481f789
3
+ size 1625525396
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fa96591d7d61f01c30102e1a9e61609755d2a62609e1b717c50fa7703c6f1089
3
- size 3250931983
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:057757861d66f1ae8c1ae01875cb7429fe6c6dda10b3e6c3ad5f56232bde4e85
3
+ size 3250956559
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2359fee00ba5d131fe31cd30a6732127d6476bef7d18d093ccdc1436d02d130b
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:333ba09bff458e71dc26fed46a13a420cd849a9ad71efe06b014f5627bab7dcc
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3639a21adec6316397ddf46df39adb14bf2c1acdddc7615711b1520a3e25eb1e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:441fe55200e88247d0cb84c3a5d41f73058895b59e0666b89da76a37ed964576
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,200 +1,35 @@
1
  {
2
- "best_metric": 2.896005868911743,
3
- "best_model_checkpoint": "/content/drive/MyDrive/W210 Capstone - Lyric Generation with Melody/loaf/models/lyrlen/bart/bart-finetuned-lyrlen-128-tokens/checkpoint-4500",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 6000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.33,
13
- "grad_norm": 2.3970043659210205,
14
- "learning_rate": 4.5833333333333334e-05,
15
- "loss": 3.2981,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.33,
20
- "eval_loss": 3.0587222576141357,
21
- "eval_runtime": 130.1009,
22
- "eval_samples_per_second": 23.059,
23
- "eval_steps_per_second": 1.445,
24
  "step": 500
25
- },
26
- {
27
- "epoch": 0.67,
28
- "grad_norm": 2.09039044380188,
29
- "learning_rate": 4.166666666666667e-05,
30
- "loss": 3.0335,
31
- "step": 1000
32
- },
33
- {
34
- "epoch": 0.67,
35
- "eval_loss": 2.976158857345581,
36
- "eval_runtime": 130.8207,
37
- "eval_samples_per_second": 22.932,
38
- "eval_steps_per_second": 1.437,
39
- "step": 1000
40
- },
41
- {
42
- "epoch": 1.0,
43
- "grad_norm": 3.244218587875366,
44
- "learning_rate": 3.7500000000000003e-05,
45
- "loss": 2.9525,
46
- "step": 1500
47
- },
48
- {
49
- "epoch": 1.0,
50
- "eval_loss": 2.9431517124176025,
51
- "eval_runtime": 129.2605,
52
- "eval_samples_per_second": 23.209,
53
- "eval_steps_per_second": 1.454,
54
- "step": 1500
55
- },
56
- {
57
- "epoch": 1.33,
58
- "grad_norm": 2.3262383937835693,
59
- "learning_rate": 3.3333333333333335e-05,
60
- "loss": 2.9021,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 1.33,
65
- "eval_loss": 2.956195592880249,
66
- "eval_runtime": 129.8737,
67
- "eval_samples_per_second": 23.099,
68
- "eval_steps_per_second": 1.448,
69
- "step": 2000
70
- },
71
- {
72
- "epoch": 1.67,
73
- "grad_norm": 2.195279598236084,
74
- "learning_rate": 2.916666666666667e-05,
75
- "loss": 2.8671,
76
- "step": 2500
77
- },
78
- {
79
- "epoch": 1.67,
80
- "eval_loss": 2.9502499103546143,
81
- "eval_runtime": 129.9739,
82
- "eval_samples_per_second": 23.082,
83
- "eval_steps_per_second": 1.446,
84
- "step": 2500
85
- },
86
- {
87
- "epoch": 2.0,
88
- "grad_norm": 2.360499382019043,
89
- "learning_rate": 2.5e-05,
90
- "loss": 2.8037,
91
- "step": 3000
92
- },
93
- {
94
- "epoch": 2.0,
95
- "eval_loss": 2.913353204727173,
96
- "eval_runtime": 130.2228,
97
- "eval_samples_per_second": 23.037,
98
- "eval_steps_per_second": 1.444,
99
- "step": 3000
100
- },
101
- {
102
- "epoch": 2.33,
103
- "grad_norm": 2.184467077255249,
104
- "learning_rate": 2.0833333333333336e-05,
105
- "loss": 2.7389,
106
- "step": 3500
107
- },
108
- {
109
- "epoch": 2.33,
110
- "eval_loss": 2.9057974815368652,
111
- "eval_runtime": 129.5421,
112
- "eval_samples_per_second": 23.158,
113
- "eval_steps_per_second": 1.451,
114
- "step": 3500
115
- },
116
- {
117
- "epoch": 2.67,
118
- "grad_norm": 2.1960461139678955,
119
- "learning_rate": 1.6666666666666667e-05,
120
- "loss": 2.7305,
121
- "step": 4000
122
- },
123
- {
124
- "epoch": 2.67,
125
- "eval_loss": 2.9016354084014893,
126
- "eval_runtime": 133.3947,
127
- "eval_samples_per_second": 22.49,
128
- "eval_steps_per_second": 1.409,
129
- "step": 4000
130
- },
131
- {
132
- "epoch": 3.0,
133
- "grad_norm": 2.1077606678009033,
134
- "learning_rate": 1.25e-05,
135
- "loss": 2.7196,
136
- "step": 4500
137
- },
138
- {
139
- "epoch": 3.0,
140
- "eval_loss": 2.896005868911743,
141
- "eval_runtime": 133.0227,
142
- "eval_samples_per_second": 22.553,
143
- "eval_steps_per_second": 1.413,
144
- "step": 4500
145
- },
146
- {
147
- "epoch": 3.33,
148
- "grad_norm": 2.1256420612335205,
149
- "learning_rate": 8.333333333333334e-06,
150
- "loss": 2.7217,
151
- "step": 5000
152
- },
153
- {
154
- "epoch": 3.33,
155
- "eval_loss": 2.9097442626953125,
156
- "eval_runtime": 135.8274,
157
- "eval_samples_per_second": 22.087,
158
- "eval_steps_per_second": 1.384,
159
- "step": 5000
160
- },
161
- {
162
- "epoch": 3.67,
163
- "grad_norm": 2.5816872119903564,
164
- "learning_rate": 4.166666666666667e-06,
165
- "loss": 2.6659,
166
- "step": 5500
167
- },
168
- {
169
- "epoch": 3.67,
170
- "eval_loss": 2.9044690132141113,
171
- "eval_runtime": 151.0962,
172
- "eval_samples_per_second": 19.855,
173
- "eval_steps_per_second": 1.244,
174
- "step": 5500
175
- },
176
- {
177
- "epoch": 4.0,
178
- "grad_norm": 2.135594367980957,
179
- "learning_rate": 0.0,
180
- "loss": 2.6616,
181
- "step": 6000
182
- },
183
- {
184
- "epoch": 4.0,
185
- "eval_loss": 2.9007933139801025,
186
- "eval_runtime": 134.807,
187
- "eval_samples_per_second": 22.254,
188
- "eval_steps_per_second": 1.395,
189
- "step": 6000
190
  }
191
  ],
192
  "logging_steps": 500,
193
- "max_steps": 6000,
194
  "num_input_tokens_seen": 0,
195
- "num_train_epochs": 4,
196
  "save_steps": 500,
197
- "total_flos": 2.6005255225344e+16,
198
  "train_batch_size": 16,
199
  "trial_name": null,
200
  "trial_params": null
 
1
  {
2
+ "best_metric": 3.000533103942871,
3
+ "best_model_checkpoint": "/content/drive/MyDrive/W210 Capstone - Lyric Generation with Melody/loaf/models/lyrlen/bart/bart-finetuned-lyrlen-128-tokens/checkpoint-500",
4
+ "epoch": 0.3333333333333333,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.33,
13
+ "grad_norm": 3.941305637359619,
14
+ "learning_rate": 3.3333333333333335e-05,
15
+ "loss": 3.278,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.33,
20
+ "eval_loss": 3.000533103942871,
21
+ "eval_runtime": 131.9187,
22
+ "eval_samples_per_second": 22.741,
23
+ "eval_steps_per_second": 1.425,
24
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 500,
28
+ "max_steps": 1500,
29
  "num_input_tokens_seen": 0,
30
+ "num_train_epochs": 1,
31
  "save_steps": 500,
32
+ "total_flos": 2167104602112000.0,
33
  "train_batch_size": 16,
34
  "trial_name": null,
35
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ce994ff3b06e2eb80c3fce5805bc05343bf1dccad088e68bde239a33ab1047d0
3
  size 5432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2befca1929a268b5d8512d98f0406e042b2c61d35610fcaddab16419f0b1149e
3
  size 5432