PatrickML commited on
Commit
cc03254
1 Parent(s): 56eae1b

Training in progress, step 90, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df0c4dd1a2df7a622f7460ba10c96b316ca2e5755f8dff63c88e8647b9c85909
3
  size 966995080
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a770f3185b4bc959d9093b622efcb1c6a0649f574dd4cb2f803a7a0b0f5847e
3
  size 966995080
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:60b759fe6baa60f3f159d4ecc2a95ca6aaa39ba91aded1ce9c8c352852a22ab1
3
  size 1925064044
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9404b3b8073985a0ad59571a09b36826009505c1e25c30b4a3bcaea415bb5aa
3
  size 1925064044
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:41c17a6b8cedc846257b89a1ab8ff684919717e9d5cf8771c309b21cc6315f5a
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5dfc9586ba42710aa4ce0391828f29de924e90b3b8e4b33ef6fb16e4e88e645f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:052f014cd69d6e12b5a27bac6856cb07fa8f819e18d6b5a58297317aa0344599
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df8acfe66f0561ea918b9d7da9a7d6ce9312bdfd8b3c3cd4af6fca3356e7db5c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.02753303964757709,
5
  "eval_steps": 10,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -19,6 +19,12 @@
19
  "learning_rate": 9.200000000000001e-07,
20
  "loss": 2.3577,
21
  "step": 50
 
 
 
 
 
 
22
  }
23
  ],
24
  "logging_steps": 25,
@@ -26,7 +32,7 @@
26
  "num_input_tokens_seen": 0,
27
  "num_train_epochs": 3,
28
  "save_steps": 10,
29
- "total_flos": 2.30868320256e+17,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.04955947136563876,
5
  "eval_steps": 10,
6
+ "global_step": 90,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
19
  "learning_rate": 9.200000000000001e-07,
20
  "loss": 2.3577,
21
  "step": 50
22
+ },
23
+ {
24
+ "epoch": 0.04,
25
+ "learning_rate": 1.42e-06,
26
+ "loss": 1.7546,
27
+ "step": 75
28
  }
29
  ],
30
  "logging_steps": 25,
 
32
  "num_input_tokens_seen": 0,
33
  "num_train_epochs": 3,
34
  "save_steps": 10,
35
+ "total_flos": 4.155629764608e+17,
36
  "trial_name": null,
37
  "trial_params": null
38
  }