ccore commited on
Commit
14a3284
·
verified ·
1 Parent(s): 50fc6da

Training in progress, epoch 12, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c408f993022d87ffeadd2365ed7494f1b456c5c1291f2bb376a18c621f77183
3
  size 500979600
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06de0c909deed20e1208a403267a8b9becdefebc13a97a5e488e1cc8dbcc736f
3
  size 500979600
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ee0ac80c40130b527841f63c414015f68ceaac0e29ef5ad311b003d0f378d6d
3
  size 1002078330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db172d07973ebe41ada05766c94cc260a12bcbf89680ae87b257a418c9502b38
3
  size 1002078330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:379dad426960eae5c3c9028ee1b85f1d5429a4e2331d6e3a1f81cd737e000d07
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:28d7cc0432a0bb11367a1b81ab0d3d42eac554116c5178476ad84855e3a6093e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b4eac702da66c6639da79428ff8df52283f3e77ca66f222977d4fe4ff13ca1f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef0269bc208414eb97e009c66d90836f5c207f79b2289af505d56f8cafbb21e3
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.3360599279403687,
3
- "best_model_checkpoint": "./opt_trained/checkpoint-242",
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 242,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -95,6 +95,14 @@
95
  "eval_samples_per_second": 37.203,
96
  "eval_steps_per_second": 9.301,
97
  "step": 242
 
 
 
 
 
 
 
 
98
  }
99
  ],
100
  "logging_steps": 500,
@@ -114,7 +122,7 @@
114
  "attributes": {}
115
  }
116
  },
117
- "total_flos": 7803621264384000.0,
118
  "train_batch_size": 16,
119
  "trial_name": null,
120
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.3332167863845825,
3
+ "best_model_checkpoint": "./opt_trained/checkpoint-264",
4
+ "epoch": 12.0,
5
  "eval_steps": 500,
6
+ "global_step": 264,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
95
  "eval_samples_per_second": 37.203,
96
  "eval_steps_per_second": 9.301,
97
  "step": 242
98
+ },
99
+ {
100
+ "epoch": 12.0,
101
+ "eval_loss": 1.3332167863845825,
102
+ "eval_runtime": 8.1186,
103
+ "eval_samples_per_second": 36.952,
104
+ "eval_steps_per_second": 9.238,
105
+ "step": 264
106
  }
107
  ],
108
  "logging_steps": 500,
 
122
  "attributes": {}
123
  }
124
  },
125
+ "total_flos": 8513041379328000.0,
126
  "train_batch_size": 16,
127
  "trial_name": null,
128
  "trial_params": null