TracyTank commited on
Commit
d453081
·
verified ·
1 Parent(s): 69d9db4

Training in progress, step 166, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9cfa661c54339a670d279a82539151fa79137d5093e0b025188d9e50254cec5
3
  size 335604696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2727cf1162bcdc2ea431dcc02d9c762ca7acfebaebb93304ffda64edf7ffe3ec
3
  size 335604696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4ca9111e3ea091b87e489018a9e97525952d3864a098a50a52e754f3f22c360
3
  size 671466706
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa5547d1958277542f79ab9a0f0261b7802aa5ff9e2e55c2d97095651e68148c
3
  size 671466706
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6e7c49e05b21e3653355619157ebf34445c75bc033c13bc3bd09fcb19f06f8d1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1a837812d656bbd27708f52fd6b43ec1c9f0520c4b827efa1781caaf5242150d
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:211ff8c0d5e10554f2ceb347d0f6acd4f3acf6ba9002d45319f7c3abcda93013
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18e4a513de2adf3f7e2c1ac68c21245a28b4c2292ee16f53025ec71806c9bb44
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c2a4d4c9298a700cb8145e9be723fe53addaa36487b10ce757075d75d235e7dc
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cf63c606aaa799919b498936a8161d20f282cdca30eb1326c2e12fad8e2ae60d
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f7236385d7d483948c57cf28bf8bd5f038c9333fa029cab83bf7bdfc42e8b4ab
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3209aba9b70f7586de5581283e2884742c6c9595707339fe0f46d0ca4b014b83
3
  size 15024
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a51a61d26673f45740087c7c6bc461057f2353ff1696dc358d96750878ca6351
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fec1e40c6dba74f1038e9d765519a0d1df17ea9f1c2a8daabaf19c4ba3779056
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.40452006459236145,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
- "epoch": 0.24324735215121876,
5
  "eval_steps": 25,
6
- "global_step": 150,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1113,6 +1113,118 @@
1113
  "eval_samples_per_second": 32.208,
1114
  "eval_steps_per_second": 8.374,
1115
  "step": 150
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1116
  }
1117
  ],
1118
  "logging_steps": 1,
@@ -1136,12 +1248,12 @@
1136
  "should_evaluate": false,
1137
  "should_log": false,
1138
  "should_save": true,
1139
- "should_training_stop": false
1140
  },
1141
  "attributes": {}
1142
  }
1143
  },
1144
- "total_flos": 3.580912017919181e+18,
1145
  "train_batch_size": 1,
1146
  "trial_name": null,
1147
  "trial_params": null
 
1
  {
2
  "best_metric": 0.40452006459236145,
3
  "best_model_checkpoint": "miner_id_24/checkpoint-150",
4
+ "epoch": 0.2691937363806821,
5
  "eval_steps": 25,
6
+ "global_step": 166,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1113
  "eval_samples_per_second": 32.208,
1114
  "eval_steps_per_second": 8.374,
1115
  "step": 150
1116
+ },
1117
+ {
1118
+ "epoch": 0.24486900116556023,
1119
+ "grad_norm": 0.14199906587600708,
1120
+ "learning_rate": 1.1986684236740763e-05,
1121
+ "loss": 0.4249,
1122
+ "step": 151
1123
+ },
1124
+ {
1125
+ "epoch": 0.24649065017990168,
1126
+ "grad_norm": 0.16196103394031525,
1127
+ "learning_rate": 1.1732279305266082e-05,
1128
+ "loss": 0.4533,
1129
+ "step": 152
1130
+ },
1131
+ {
1132
+ "epoch": 0.24811229919424316,
1133
+ "grad_norm": 0.1596025824546814,
1134
+ "learning_rate": 1.1494979854388329e-05,
1135
+ "loss": 0.41,
1136
+ "step": 153
1137
+ },
1138
+ {
1139
+ "epoch": 0.2497339482085846,
1140
+ "grad_norm": 0.16512756049633026,
1141
+ "learning_rate": 1.1274879698181547e-05,
1142
+ "loss": 0.4556,
1143
+ "step": 154
1144
+ },
1145
+ {
1146
+ "epoch": 0.25135559722292605,
1147
+ "grad_norm": 0.15834008157253265,
1148
+ "learning_rate": 1.1072065851142718e-05,
1149
+ "loss": 0.4458,
1150
+ "step": 155
1151
+ },
1152
+ {
1153
+ "epoch": 0.25297724623726753,
1154
+ "grad_norm": 0.1588006466627121,
1155
+ "learning_rate": 1.0886618493791376e-05,
1156
+ "loss": 0.431,
1157
+ "step": 156
1158
+ },
1159
+ {
1160
+ "epoch": 0.254598895251609,
1161
+ "grad_norm": 0.15690577030181885,
1162
+ "learning_rate": 1.0718610940971071e-05,
1163
+ "loss": 0.417,
1164
+ "step": 157
1165
+ },
1166
+ {
1167
+ "epoch": 0.2562205442659504,
1168
+ "grad_norm": 0.16939620673656464,
1169
+ "learning_rate": 1.0568109612865e-05,
1170
+ "loss": 0.4517,
1171
+ "step": 158
1172
+ },
1173
+ {
1174
+ "epoch": 0.2578421932802919,
1175
+ "grad_norm": 0.16963833570480347,
1176
+ "learning_rate": 1.0435174008737416e-05,
1177
+ "loss": 0.4638,
1178
+ "step": 159
1179
+ },
1180
+ {
1181
+ "epoch": 0.2594638422946334,
1182
+ "grad_norm": 0.18084716796875,
1183
+ "learning_rate": 1.0319856683411197e-05,
1184
+ "loss": 0.4172,
1185
+ "step": 160
1186
+ },
1187
+ {
1188
+ "epoch": 0.2610854913089748,
1189
+ "grad_norm": 0.20121614634990692,
1190
+ "learning_rate": 1.0222203226490767e-05,
1191
+ "loss": 0.4177,
1192
+ "step": 161
1193
+ },
1194
+ {
1195
+ "epoch": 0.26270714032331627,
1196
+ "grad_norm": 0.21996457874774933,
1197
+ "learning_rate": 1.0142252244338688e-05,
1198
+ "loss": 0.4157,
1199
+ "step": 162
1200
+ },
1201
+ {
1202
+ "epoch": 0.26432878933765774,
1203
+ "grad_norm": 0.1590254306793213,
1204
+ "learning_rate": 1.0080035344813017e-05,
1205
+ "loss": 0.4034,
1206
+ "step": 163
1207
+ },
1208
+ {
1209
+ "epoch": 0.26595043835199916,
1210
+ "grad_norm": 0.13573700189590454,
1211
+ "learning_rate": 1.0035577124771419e-05,
1212
+ "loss": 0.4021,
1213
+ "step": 164
1214
+ },
1215
+ {
1216
+ "epoch": 0.26757208736634064,
1217
+ "grad_norm": 0.15125057101249695,
1218
+ "learning_rate": 1.0008895160347052e-05,
1219
+ "loss": 0.4395,
1220
+ "step": 165
1221
+ },
1222
+ {
1223
+ "epoch": 0.2691937363806821,
1224
+ "grad_norm": 0.16019335389137268,
1225
+ "learning_rate": 1e-05,
1226
+ "loss": 0.4371,
1227
+ "step": 166
1228
  }
1229
  ],
1230
  "logging_steps": 1,
 
1248
  "should_evaluate": false,
1249
  "should_log": false,
1250
  "should_save": true,
1251
+ "should_training_stop": true
1252
  },
1253
  "attributes": {}
1254
  }
1255
  },
1256
+ "total_flos": 3.962875966497227e+18,
1257
  "train_batch_size": 1,
1258
  "trial_name": null,
1259
  "trial_params": null