mgh6 commited on
Commit
b6627f8
1 Parent(s): 87e0317

Training in progress, step 5120, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2a79993ca3bb2e40d715e49b6365049f27102e49dd8b3a9ce020c6ea5a9f9fe9
3
  size 4725595416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:869b484e58461f1536cc21d599303cb514c3d752e29ef44231f6d6c363605817
3
  size 4725595416
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1bf2691edb5f20acb6de9eb1f6120c2449bed48ca00eecc968c5be167084b7bb
3
  size 9179193343
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14a089b06ff32225e1d8e28e6b6c1cab9f01e70af7cbc2eac64ef56b2989354d
3
  size 9179193343
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c349c1691bbeda5a6b16abd459bd4b17c698c1ae8b87b93b48229ee14acd38e
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc1239486b7f83e4a2231cde24a50b503b22ee79d6ee232760274da141c18674
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28472ecbb49d175fddb5467d2d36c375ce76e352a7c4d1642d73ecb32735946a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ce8999a5d1b14a256d594f72ba3d10015736fefa5fd7057a03491428983da78
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 19020.044921875,
3
- "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-2560",
4
- "epoch": 0.18914450899082907,
5
  "eval_steps": 512,
6
- "global_step": 2560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -107,6 +107,106 @@
107
  "eval_samples_per_second": 69.222,
108
  "eval_steps_per_second": 69.222,
109
  "step": 2560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  }
111
  ],
112
  "logging_steps": 256,
 
1
  {
2
+ "best_metric": 4288.33056640625,
3
+ "best_model_checkpoint": "mgh6/TCS_Pairing_VAE/checkpoint-5120",
4
+ "epoch": 0.37828901798165815,
5
  "eval_steps": 512,
6
+ "global_step": 5120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
107
  "eval_samples_per_second": 69.222,
108
  "eval_steps_per_second": 69.222,
109
  "step": 2560
110
+ },
111
+ {
112
+ "epoch": 0.21,
113
+ "learning_rate": 9.58386286389833e-05,
114
+ "loss": 15475.9717,
115
+ "step": 2816
116
+ },
117
+ {
118
+ "epoch": 0.23,
119
+ "learning_rate": 9.546032215161815e-05,
120
+ "loss": 11322.8867,
121
+ "step": 3072
122
+ },
123
+ {
124
+ "epoch": 0.23,
125
+ "eval_loss": 8387.529296875,
126
+ "eval_runtime": 56.5145,
127
+ "eval_samples_per_second": 60.126,
128
+ "eval_steps_per_second": 60.126,
129
+ "step": 3072
130
+ },
131
+ {
132
+ "epoch": 0.25,
133
+ "learning_rate": 9.5082015664253e-05,
134
+ "loss": 8684.2373,
135
+ "step": 3328
136
+ },
137
+ {
138
+ "epoch": 0.26,
139
+ "learning_rate": 9.470370917688785e-05,
140
+ "loss": 6917.9409,
141
+ "step": 3584
142
+ },
143
+ {
144
+ "epoch": 0.26,
145
+ "eval_loss": 5089.6796875,
146
+ "eval_runtime": 52.3172,
147
+ "eval_samples_per_second": 64.95,
148
+ "eval_steps_per_second": 64.95,
149
+ "step": 3584
150
+ },
151
+ {
152
+ "epoch": 0.28,
153
+ "learning_rate": 9.43254026895227e-05,
154
+ "loss": 6025.4263,
155
+ "step": 3840
156
+ },
157
+ {
158
+ "epoch": 0.3,
159
+ "learning_rate": 9.394709620215754e-05,
160
+ "loss": 5538.1548,
161
+ "step": 4096
162
+ },
163
+ {
164
+ "epoch": 0.3,
165
+ "eval_loss": 4470.64990234375,
166
+ "eval_runtime": 49.8023,
167
+ "eval_samples_per_second": 68.23,
168
+ "eval_steps_per_second": 68.23,
169
+ "step": 4096
170
+ },
171
+ {
172
+ "epoch": 0.32,
173
+ "learning_rate": 9.356878971479238e-05,
174
+ "loss": 5257.3623,
175
+ "step": 4352
176
+ },
177
+ {
178
+ "epoch": 0.34,
179
+ "learning_rate": 9.319048322742722e-05,
180
+ "loss": 5375.1353,
181
+ "step": 4608
182
+ },
183
+ {
184
+ "epoch": 0.34,
185
+ "eval_loss": 4827.8271484375,
186
+ "eval_runtime": 49.3721,
187
+ "eval_samples_per_second": 68.824,
188
+ "eval_steps_per_second": 68.824,
189
+ "step": 4608
190
+ },
191
+ {
192
+ "epoch": 0.36,
193
+ "learning_rate": 9.281217674006207e-05,
194
+ "loss": 5494.5615,
195
+ "step": 4864
196
+ },
197
+ {
198
+ "epoch": 0.38,
199
+ "learning_rate": 9.243387025269692e-05,
200
+ "loss": 5258.2065,
201
+ "step": 5120
202
+ },
203
+ {
204
+ "epoch": 0.38,
205
+ "eval_loss": 4288.33056640625,
206
+ "eval_runtime": 51.0091,
207
+ "eval_samples_per_second": 66.616,
208
+ "eval_steps_per_second": 66.616,
209
+ "step": 5120
210
  }
211
  ],
212
  "logging_steps": 256,