johannes-garstenauer commited on
Commit
2d4f4f8
1 Parent(s): b468397

Training in progress, step 10074, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a02998f7e9d94c7f46713ace0e277bc3596448b515038d4f707450c23811962d
3
  size 532568837
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:15a7cea6ce4b9d8dac48e585a7abea04090775d79128f2909f7e9d7925c29749
3
  size 532568837
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:396b2e01f369a79c1a52b3ef3823ed8de1e99074cd23b445cb60415bde158199
3
  size 266276525
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dfeff1913f0bc2395f345ab6145fab2f726a75124923e2a8674e4e8647f93c8
3
  size 266276525
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b9204453e030579bdd226736775f8c6095e16b9f2a366dfe9186190d57314c68
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e7bf973ea7de736bb327ffc19187b92d63e286f116b6db2af5a5a588f3c2508
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6575d2c3a8439954baf4c55941916c7bf669136fc2605f9e90afd2ab3c947992
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6f9f3d403b37f48f67eb536bde724d16ba32882bb5029b456fb2d29d78a83c80
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7502904638484226,
5
  "eval_steps": 500,
6
- "global_step": 8395,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -103,13 +103,37 @@
103
  "learning_rate": 3.8083534423689934e-05,
104
  "loss": 0.011,
105
  "step": 8000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
  }
107
  ],
108
  "logging_steps": 500,
109
  "max_steps": 33567,
110
  "num_train_epochs": 3,
111
  "save_steps": 1679,
112
- "total_flos": 1.423517834182656e+17,
113
  "trial_name": null,
114
  "trial_params": null
115
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9003485566181071,
5
  "eval_steps": 500,
6
+ "global_step": 10074,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
103
  "learning_rate": 3.8083534423689934e-05,
104
  "loss": 0.011,
105
  "step": 8000
106
+ },
107
+ {
108
+ "epoch": 0.76,
109
+ "learning_rate": 3.7338755325170554e-05,
110
+ "loss": 0.0094,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 0.8,
115
+ "learning_rate": 3.659397622665118e-05,
116
+ "loss": 0.0101,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 0.85,
121
+ "learning_rate": 3.584919712813179e-05,
122
+ "loss": 0.0106,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 0.89,
127
+ "learning_rate": 3.510441802961242e-05,
128
+ "loss": 0.0104,
129
+ "step": 10000
130
  }
131
  ],
132
  "logging_steps": 500,
133
  "max_steps": 33567,
134
  "num_train_epochs": 3,
135
  "save_steps": 1679,
136
+ "total_flos": 1.7082214010191872e+17,
137
  "trial_name": null,
138
  "trial_params": null
139
  }