mamung commited on
Commit
bfa8161
·
verified ·
1 Parent(s): 03aae2a

Training in progress, step 45, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e7627d883c62daaa26528ac5de1db758728ffc0e798698cf9dc09e67f3713bd
3
  size 4628218
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ba4e6f31201d085ea85e641dee81f15497cbec2c4bf4ac956f4b70c6557a22c
3
  size 4628218
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aeeb1d37c35dddc3499dfa65807e0f1e540a8c757e9ef0700cce98a54ce426b7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cf420d826a76ebf3d18a2641574c572d67f70da8adcccd5dcab567f4430e22d
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e9573e9d18ad7803441c819253e1705231d5811c0e4f29747c0574440f588f00
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b665a2618254d0693455029a117143eb612684fd8f287fa7207d501afc785521
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06524694154961486,
5
  "eval_steps": 9,
6
- "global_step": 36,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -96,6 +96,28 @@
96
  "eval_samples_per_second": 285.366,
97
  "eval_steps_per_second": 35.78,
98
  "step": 36
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99
  }
100
  ],
101
  "logging_steps": 5,
@@ -115,7 +137,7 @@
115
  "attributes": {}
116
  }
117
  },
118
- "total_flos": 647134130995200.0,
119
  "train_batch_size": 8,
120
  "trial_name": null,
121
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.08155867693701857,
5
  "eval_steps": 9,
6
+ "global_step": 45,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
96
  "eval_samples_per_second": 285.366,
97
  "eval_steps_per_second": 35.78,
98
  "step": 36
99
+ },
100
+ {
101
+ "epoch": 0.07249660172179428,
102
+ "grad_norm": NaN,
103
+ "learning_rate": 0.00012803300858899104,
104
+ "loss": 0.0,
105
+ "step": 40
106
+ },
107
+ {
108
+ "epoch": 0.08155867693701857,
109
+ "grad_norm": NaN,
110
+ "learning_rate": 0.00011666776747647015,
111
+ "loss": 0.0,
112
+ "step": 45
113
+ },
114
+ {
115
+ "epoch": 0.08155867693701857,
116
+ "eval_loss": NaN,
117
+ "eval_runtime": 6.8846,
118
+ "eval_samples_per_second": 284.982,
119
+ "eval_steps_per_second": 35.732,
120
+ "step": 45
121
  }
122
  ],
123
  "logging_steps": 5,
 
137
  "attributes": {}
138
  }
139
  },
140
+ "total_flos": 808917663744000.0,
141
  "train_batch_size": 8,
142
  "trial_name": null,
143
  "trial_params": null