farmery commited on
Commit
fa3662c
·
verified ·
1 Parent(s): f535738

Training in progress, step 166, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:331192ac1f6bf314815e5b10c88737996486d390146d8e785619ad7dc5ff79e5
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f794e5c83f0b343f9e3a3ce74fafedfaa9b179bd2557cf18c4129319f11b3194
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:631686b4569ed6de8de2e33d5b47bc87418f2cf1a3133003d646c996e7dfb4d8
3
  size 335922386
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8167e5b1e056ab3bad805ec040e7b1258185d6aef7a734e363c7279f173e174a
3
  size 335922386
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:044ff09e8958661e9d77f3530a25f53b93580ac58f0ab4d62554f82f49c61f99
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30af804811173e42e95fdca064724f01624f4a7180f01f32ec76aecfeaf944fa
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:91621a6713336b0ef6a2d021e0a0ad7048da717f4aa80b55d1dfc17fdad2901e
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3751d7c90b6106e2e2b9533ec2331aad6e66c18cc58e059c9af17b2f73c9d0de
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a8149c42bf13992ca336b5c00aa8ceaa16f084d366bdf1be14868e30d2b8361
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:376e62010036f3bdd3a656eef389aed66bdbaf88a636951248707427be62583b
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d8fe7a2cad1ff69a24b74a2c491e671d4a204c81bb46fd633038e8544921048
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5376e5b027ed022c853717be24ac05fc92b2b9af8b60781eed3e4e4ee84ebe9
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a3ae2800eef0245add9e0e1be7ec7a57e0cd3c41c460c9546ae4fa2e57287fa9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec1e40c6dba74f1038e9d765519a0d1df17ea9f1c2a8daabaf19c4ba3779056
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.8791134357452393,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.07675517497781295,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1113,6 +1113,118 @@
1113
  "eval_samples_per_second": 32.565,
1114
  "eval_steps_per_second": 8.467,
1115
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  }
1117
  ],
1118
  "logging_steps": 1,
@@ -1136,12 +1248,12 @@
1136
  "should_evaluate": false,
1137
  "should_log": false,
1138
  "should_save": true,
1139
- "should_training_stop": false
1140
  },
1141
  "attributes": {}
1142
  }
1143
  },
1144
- "total_flos": 3.563655565173326e+18,
1145
  "train_batch_size": 1,
1146
  "trial_name": null,
1147
  "trial_params": null
 
1
  {
2
  "best_metric": 1.8791134357452393,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.084942393642113,
5
  "eval_steps": 25,
6
+ "global_step": 166,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1113
  "eval_samples_per_second": 32.565,
1114
  "eval_steps_per_second": 8.467,
1115
  "step": 150
1116
+ },
1117
+ {
1118
+ "epoch": 0.07726687614433171,
1119
+ "grad_norm": 0.33062976598739624,
1120
+ "learning_rate": 1.1937684892050604e-05,
1121
+ "loss": 1.3588,
1122
+ "step": 151
1123
+ },
1124
+ {
1125
+ "epoch": 0.07777857731085046,
1126
+ "grad_norm": 0.5277450084686279,
1127
+ "learning_rate": 1.168951435958588e-05,
1128
+ "loss": 1.7291,
1129
+ "step": 152
1130
+ },
1131
+ {
1132
+ "epoch": 0.07829027847736922,
1133
+ "grad_norm": 0.5764089226722717,
1134
+ "learning_rate": 1.1458040843788312e-05,
1135
+ "loss": 1.8712,
1136
+ "step": 153
1137
+ },
1138
+ {
1139
+ "epoch": 0.07880197964388796,
1140
+ "grad_norm": 0.6534531712532043,
1141
+ "learning_rate": 1.1243353582104556e-05,
1142
+ "loss": 1.9575,
1143
+ "step": 154
1144
+ },
1145
+ {
1146
+ "epoch": 0.07931368081040673,
1147
+ "grad_norm": 0.7147605419158936,
1148
+ "learning_rate": 1.1045535340560744e-05,
1149
+ "loss": 1.9211,
1150
+ "step": 155
1151
+ },
1152
+ {
1153
+ "epoch": 0.07982538197692547,
1154
+ "grad_norm": 0.7351657748222351,
1155
+ "learning_rate": 1.0864662381854632e-05,
1156
+ "loss": 1.9539,
1157
+ "step": 156
1158
+ },
1159
+ {
1160
+ "epoch": 0.08033708314344423,
1161
+ "grad_norm": 0.8152849674224854,
1162
+ "learning_rate": 1.070080443595488e-05,
1163
+ "loss": 2.0232,
1164
+ "step": 157
1165
+ },
1166
+ {
1167
+ "epoch": 0.08084878430996298,
1168
+ "grad_norm": 0.9168938398361206,
1169
+ "learning_rate": 1.0554024673218807e-05,
1170
+ "loss": 1.9743,
1171
+ "step": 158
1172
+ },
1173
+ {
1174
+ "epoch": 0.08136048547648174,
1175
+ "grad_norm": 1.0510733127593994,
1176
+ "learning_rate": 1.0424379680039025e-05,
1177
+ "loss": 2.0778,
1178
+ "step": 159
1179
+ },
1180
+ {
1181
+ "epoch": 0.08187218664300049,
1182
+ "grad_norm": 1.2245820760726929,
1183
+ "learning_rate": 1.0311919437028318e-05,
1184
+ "loss": 2.2151,
1185
+ "step": 160
1186
+ },
1187
+ {
1188
+ "epoch": 0.08238388780951925,
1189
+ "grad_norm": 1.3239753246307373,
1190
+ "learning_rate": 1.0216687299751144e-05,
1191
+ "loss": 2.2195,
1192
+ "step": 161
1193
+ },
1194
+ {
1195
+ "epoch": 0.08289558897603799,
1196
+ "grad_norm": 1.7548948526382446,
1197
+ "learning_rate": 1.0138719982009242e-05,
1198
+ "loss": 2.5937,
1199
+ "step": 162
1200
+ },
1201
+ {
1202
+ "epoch": 0.08340729014255675,
1203
+ "grad_norm": 0.4118451774120331,
1204
+ "learning_rate": 1.007804754168779e-05,
1205
+ "loss": 1.4343,
1206
+ "step": 163
1207
+ },
1208
+ {
1209
+ "epoch": 0.0839189913090755,
1210
+ "grad_norm": 0.5209285616874695,
1211
+ "learning_rate": 1.003469336916747e-05,
1212
+ "loss": 1.7331,
1213
+ "step": 164
1214
+ },
1215
+ {
1216
+ "epoch": 0.08443069247559425,
1217
+ "grad_norm": 0.5564490556716919,
1218
+ "learning_rate": 1.0008674178307085e-05,
1219
+ "loss": 1.8639,
1220
+ "step": 165
1221
+ },
1222
+ {
1223
+ "epoch": 0.084942393642113,
1224
+ "grad_norm": 0.6351719498634338,
1225
+ "learning_rate": 1e-05,
1226
+ "loss": 1.9287,
1227
+ "step": 166
1228
  }
1229
  ],
1230
  "logging_steps": 1,
 
1248
  "should_evaluate": false,
1249
  "should_log": false,
1250
  "should_save": true,
1251
+ "should_training_stop": true
1252
  },
1253
  "attributes": {}
1254
  }
1255
  },
1256
+ "total_flos": 3.944151006450811e+18,
1257
  "train_batch_size": 1,
1258
  "trial_name": null,
1259
  "trial_params": null