MeedoSam commited on
Commit
fa164d6
1 Parent(s): 6cc3f95

Uploaded checkpoint-17500

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:948fb1e98580e330d4d842e33a37f9084da9e25cc7f48a4d02a36faaddab5b4e
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a6cdbde4f4ccad23f4f917c627b157da08e6dc847e2ed92ac14c89589880841
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099dcb4361bb2049aa6db3b6e81a9f34b1202083b425b25c3af7c20f9259fe22
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4075ec4b36d7b30f8711cdc8411eb66cf7511268abac12ba6b2db316a1ee7dd7
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:099feb85b2265fa46cec748072b982183c4dbb871e47eb59c01b10707a2a6958
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81db4d08b4f0905e940e600b34d55e9d49b371b457fe52a1dd9b0fc61bd94779
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c59c86cac1045db21cd2eb48ce1a34a6e8c676f8d6333a24ac6d07a63dbb2c3
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a95ec91bf6b79176b87aeb9e7899423b6eea55e3aa8c4701936014715782af7e
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.375,
5
  "eval_steps": 2500,
6
- "global_step": 15000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -160,6 +160,28 @@
160
  "eval_samples_per_second": 4.954,
161
  "eval_steps_per_second": 4.954,
162
  "step": 15000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  }
164
  ],
165
  "logging_steps": 1000,
@@ -167,7 +189,7 @@
167
  "num_input_tokens_seen": 0,
168
  "num_train_epochs": 1,
169
  "save_steps": 2500,
170
- "total_flos": 2.4153188990976e+17,
171
  "train_batch_size": 1,
172
  "trial_name": null,
173
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.4375,
5
  "eval_steps": 2500,
6
+ "global_step": 17500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
160
  "eval_samples_per_second": 4.954,
161
  "eval_steps_per_second": 4.954,
162
  "step": 15000
163
+ },
164
+ {
165
+ "epoch": 0.4,
166
+ "grad_norm": 5.487946033477783,
167
+ "learning_rate": 1.1017971014492755e-05,
168
+ "loss": 1.3825,
169
+ "step": 16000
170
+ },
171
+ {
172
+ "epoch": 0.42,
173
+ "grad_norm": 2.9339723587036133,
174
+ "learning_rate": 1.0438260869565218e-05,
175
+ "loss": 1.373,
176
+ "step": 17000
177
+ },
178
+ {
179
+ "epoch": 0.44,
180
+ "eval_loss": 1.371016263961792,
181
+ "eval_runtime": 201.6518,
182
+ "eval_samples_per_second": 4.959,
183
+ "eval_steps_per_second": 4.959,
184
+ "step": 17500
185
  }
186
  ],
187
  "logging_steps": 1000,
 
189
  "num_input_tokens_seen": 0,
190
  "num_train_epochs": 1,
191
  "save_steps": 2500,
192
+ "total_flos": 2.8178720489472e+17,
193
  "train_batch_size": 1,
194
  "trial_name": null,
195
  "trial_params": null