lesso18 commited on
Commit
131aaed
·
verified ·
1 Parent(s): acc8bb5

Training in progress, step 200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:329779a9d75f81c59138e004c74680cb2f6109c19967aebb8a6af114432882d1
3
  size 671149168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:392586bbd1d2e0bfd03a12554658d1e295050b4c5ddf2a9b1f635b349bbf03df
3
  size 671149168
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:664c4791853bfcdd7ad0bd0ff4f7aa982b6789073a49409259639cbe20c19306
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9cffc1441a3bb67007c6dc9aadc468cd33a3680bdb5d757caafe5fc9ae7d64cd
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:818f16670711f557472473aa4463545c4162b79354ecd58d3965097bd34e18c6
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b05e3ff61da4b09e0b44b4180d36a0787a179101f6710bc48bb92a55540b2bff
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4ffe1cc6011db9bcd26d2a5ee9a2f60bf90f43b3c6aed165ee32997fe344b31
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c41568ec8d15c28a2145796b65c5977bed142a18699c02f8db4b41d7a5440c4c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 2.2884583473205566,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.054122316435143425,
5
  "eval_steps": 50,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -144,6 +144,49 @@
144
  "eval_samples_per_second": 15.957,
145
  "eval_steps_per_second": 3.993,
146
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
147
  }
148
  ],
149
  "logging_steps": 10,
@@ -172,7 +215,7 @@
172
  "attributes": {}
173
  }
174
  },
175
- "total_flos": 2.922745058820096e+16,
176
  "train_batch_size": 4,
177
  "trial_name": null,
178
  "trial_params": null
 
1
  {
2
+ "best_metric": 2.2128450870513916,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-200",
4
+ "epoch": 0.07216308858019123,
5
  "eval_steps": 50,
6
+ "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
144
  "eval_samples_per_second": 15.957,
145
  "eval_steps_per_second": 3.993,
146
  "step": 150
147
+ },
148
+ {
149
+ "epoch": 0.057730470864152984,
150
+ "grad_norm": 1.366658329963684,
151
+ "learning_rate": 0.00018740803823691298,
152
+ "loss": 2.172,
153
+ "step": 160
154
+ },
155
+ {
156
+ "epoch": 0.06133862529316255,
157
+ "grad_norm": 1.5560624599456787,
158
+ "learning_rate": 0.00018193523609311556,
159
+ "loss": 2.3027,
160
+ "step": 170
161
+ },
162
+ {
163
+ "epoch": 0.06494677972217211,
164
+ "grad_norm": 1.9186638593673706,
165
+ "learning_rate": 0.00017610710081049675,
166
+ "loss": 2.1255,
167
+ "step": 180
168
+ },
169
+ {
170
+ "epoch": 0.06855493415118168,
171
+ "grad_norm": 2.413241386413574,
172
+ "learning_rate": 0.00016995202647831142,
173
+ "loss": 2.1701,
174
+ "step": 190
175
+ },
176
+ {
177
+ "epoch": 0.07216308858019123,
178
+ "grad_norm": 4.694942474365234,
179
+ "learning_rate": 0.00016350000000000002,
180
+ "loss": 2.0427,
181
+ "step": 200
182
+ },
183
+ {
184
+ "epoch": 0.07216308858019123,
185
+ "eval_loss": 2.2128450870513916,
186
+ "eval_runtime": 73.3767,
187
+ "eval_samples_per_second": 15.904,
188
+ "eval_steps_per_second": 3.979,
189
+ "step": 200
190
  }
191
  ],
192
  "logging_steps": 10,
 
215
  "attributes": {}
216
  }
217
  },
218
+ "total_flos": 3.893850675137741e+16,
219
  "train_batch_size": 4,
220
  "trial_name": null,
221
  "trial_params": null