ZeroUniqueness commited on
Commit
2c0a422
β€’
1 Parent(s): cbbaa1a

Training in progress, step 21000

Browse files
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:be1e03ed4e6123418bf0d0eb5aa75d959570f0aeb7b2fc39e2bb25599324a44b
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f715fb1f731563489cd7a96170934d1a2704e0ffdab19dbb6afc7d46ea57e62
3
  size 500897101
{checkpoint-17000 β†’ checkpoint-20000/adapter_model}/README.md RENAMED
File without changes
{checkpoint-17000 β†’ checkpoint-20000/adapter_model}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "v_proj",
18
- "o_proj",
19
- "gate_proj",
20
- "k_proj",
21
  "up_proj",
22
  "down_proj",
23
- "q_proj"
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
 
17
  "up_proj",
18
  "down_proj",
19
+ "q_proj",
20
+ "v_proj",
21
+ "k_proj",
22
+ "gate_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-17000 β†’ checkpoint-20000/adapter_model}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa07e2f07041a3d8f612e13ed99c8da5ef7ee1ce18da05ac269f1c3c5b51a5a3
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be1e03ed4e6123418bf0d0eb5aa75d959570f0aeb7b2fc39e2bb25599324a44b
3
  size 500897101
{checkpoint-17000/adapter_model β†’ checkpoint-21000}/README.md RENAMED
File without changes
{checkpoint-17000/adapter_model β†’ checkpoint-21000}/adapter_config.json RENAMED
@@ -14,13 +14,13 @@
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
17
- "v_proj",
18
- "o_proj",
19
- "gate_proj",
20
- "k_proj",
21
  "up_proj",
22
  "down_proj",
23
- "q_proj"
 
 
 
 
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
 
14
  "r": 32,
15
  "revision": null,
16
  "target_modules": [
 
 
 
 
17
  "up_proj",
18
  "down_proj",
19
+ "q_proj",
20
+ "v_proj",
21
+ "k_proj",
22
+ "gate_proj",
23
+ "o_proj"
24
  ],
25
  "task_type": "CAUSAL_LM"
26
  }
{checkpoint-17000/adapter_model β†’ checkpoint-21000}/adapter_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:aa07e2f07041a3d8f612e13ed99c8da5ef7ee1ce18da05ac269f1c3c5b51a5a3
3
  size 500897101
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f715fb1f731563489cd7a96170934d1a2704e0ffdab19dbb6afc7d46ea57e62
3
  size 500897101
{checkpoint-17000 β†’ checkpoint-21000}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:948d7f8c8e41e4fd1ad04b889c66a046fb739839296d9b6c7aef3c461b6bd64a
3
  size 1001723453
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f59cb2f5454ff646db4048b5d9041957f0a425a223e652936fd2fb901f5dbb3c
3
  size 1001723453
{checkpoint-17000 β†’ checkpoint-21000}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6387060fc7a94aabfc7fb3b37a333f35005cbf37d6479887544bc5e9946fe55
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6951e71ceade65d40e7ed9c179d7e73344510ae9dc7634046a8ee3de8abe7606
3
  size 14575
{checkpoint-17000 β†’ checkpoint-21000}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f711972eb3255bd39858384ad6445b09a1044ce2e642c954c1102a132786dc5c
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:699754ccbfc6f07bc2342f0db6f56a69783000f811bfcd3674285767b1132db0
3
  size 627
{checkpoint-17000 β†’ checkpoint-21000}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.6480616927146912,
3
- "best_model_checkpoint": "./qlora-out/checkpoint-17000",
4
- "epoch": 0.6338316990417956,
5
- "global_step": 17000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -1162,11 +1162,283 @@
1162
  "eval_samples_per_second": 0.402,
1163
  "eval_steps_per_second": 0.402,
1164
  "step": 17000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  }
1166
  ],
1167
  "max_steps": 80463,
1168
  "num_train_epochs": 3,
1169
- "total_flos": 4.764203842017608e+18,
1170
  "trial_name": null,
1171
  "trial_params": null
1172
  }
 
1
  {
2
+ "best_metric": 0.6282580494880676,
3
+ "best_model_checkpoint": "./qlora-out/checkpoint-21000",
4
+ "epoch": 0.782968569404571,
5
+ "global_step": 21000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
1162
  "eval_samples_per_second": 0.402,
1163
  "eval_steps_per_second": 0.402,
1164
  "step": 17000
1165
+ },
1166
+ {
1167
+ "epoch": 0.64,
1168
+ "learning_rate": 0.0001785468302535669,
1169
+ "loss": 0.6363,
1170
+ "step": 17100
1171
+ },
1172
+ {
1173
+ "epoch": 0.64,
1174
+ "learning_rate": 0.00017830455829898317,
1175
+ "loss": 0.6076,
1176
+ "step": 17200
1177
+ },
1178
+ {
1179
+ "epoch": 0.65,
1180
+ "learning_rate": 0.00017806109235147963,
1181
+ "loss": 0.609,
1182
+ "step": 17300
1183
+ },
1184
+ {
1185
+ "epoch": 0.65,
1186
+ "learning_rate": 0.00017781643612344058,
1187
+ "loss": 0.6044,
1188
+ "step": 17400
1189
+ },
1190
+ {
1191
+ "epoch": 0.65,
1192
+ "learning_rate": 0.00017757059334539994,
1193
+ "loss": 0.6262,
1194
+ "step": 17500
1195
+ },
1196
+ {
1197
+ "epoch": 0.66,
1198
+ "learning_rate": 0.00017732356776598403,
1199
+ "loss": 0.6195,
1200
+ "step": 17600
1201
+ },
1202
+ {
1203
+ "epoch": 0.66,
1204
+ "learning_rate": 0.0001770753631518548,
1205
+ "loss": 0.6328,
1206
+ "step": 17700
1207
+ },
1208
+ {
1209
+ "epoch": 0.66,
1210
+ "learning_rate": 0.000176825983287652,
1211
+ "loss": 0.6028,
1212
+ "step": 17800
1213
+ },
1214
+ {
1215
+ "epoch": 0.67,
1216
+ "learning_rate": 0.0001765754319759358,
1217
+ "loss": 0.6159,
1218
+ "step": 17900
1219
+ },
1220
+ {
1221
+ "epoch": 0.67,
1222
+ "learning_rate": 0.0001763237130371287,
1223
+ "loss": 0.6169,
1224
+ "step": 18000
1225
+ },
1226
+ {
1227
+ "epoch": 0.67,
1228
+ "eval_loss": 0.6444052457809448,
1229
+ "eval_runtime": 1304.3701,
1230
+ "eval_samples_per_second": 0.416,
1231
+ "eval_steps_per_second": 0.416,
1232
+ "step": 18000
1233
+ },
1234
+ {
1235
+ "epoch": 0.67,
1236
+ "learning_rate": 0.0001760708303094572,
1237
+ "loss": 0.6183,
1238
+ "step": 18100
1239
+ },
1240
+ {
1241
+ "epoch": 0.68,
1242
+ "learning_rate": 0.00017581678764889324,
1243
+ "loss": 0.6116,
1244
+ "step": 18200
1245
+ },
1246
+ {
1247
+ "epoch": 0.68,
1248
+ "learning_rate": 0.00017556158892909567,
1249
+ "loss": 0.6406,
1250
+ "step": 18300
1251
+ },
1252
+ {
1253
+ "epoch": 0.69,
1254
+ "learning_rate": 0.00017530523804135085,
1255
+ "loss": 0.6223,
1256
+ "step": 18400
1257
+ },
1258
+ {
1259
+ "epoch": 0.69,
1260
+ "learning_rate": 0.00017504773889451361,
1261
+ "loss": 0.628,
1262
+ "step": 18500
1263
+ },
1264
+ {
1265
+ "epoch": 0.69,
1266
+ "learning_rate": 0.00017478909541494736,
1267
+ "loss": 0.6173,
1268
+ "step": 18600
1269
+ },
1270
+ {
1271
+ "epoch": 0.7,
1272
+ "learning_rate": 0.00017452931154646444,
1273
+ "loss": 0.61,
1274
+ "step": 18700
1275
+ },
1276
+ {
1277
+ "epoch": 0.7,
1278
+ "learning_rate": 0.00017426839125026598,
1279
+ "loss": 0.5959,
1280
+ "step": 18800
1281
+ },
1282
+ {
1283
+ "epoch": 0.7,
1284
+ "learning_rate": 0.00017400633850488128,
1285
+ "loss": 0.5979,
1286
+ "step": 18900
1287
+ },
1288
+ {
1289
+ "epoch": 0.71,
1290
+ "learning_rate": 0.00017374315730610745,
1291
+ "loss": 0.6161,
1292
+ "step": 19000
1293
+ },
1294
+ {
1295
+ "epoch": 0.71,
1296
+ "eval_loss": 0.6378119587898254,
1297
+ "eval_runtime": 1283.5987,
1298
+ "eval_samples_per_second": 0.422,
1299
+ "eval_steps_per_second": 0.422,
1300
+ "step": 19000
1301
+ },
1302
+ {
1303
+ "epoch": 0.71,
1304
+ "learning_rate": 0.00017347885166694825,
1305
+ "loss": 0.6213,
1306
+ "step": 19100
1307
+ },
1308
+ {
1309
+ "epoch": 0.72,
1310
+ "learning_rate": 0.00017321342561755297,
1311
+ "loss": 0.6217,
1312
+ "step": 19200
1313
+ },
1314
+ {
1315
+ "epoch": 0.72,
1316
+ "learning_rate": 0.00017294688320515506,
1317
+ "loss": 0.6127,
1318
+ "step": 19300
1319
+ },
1320
+ {
1321
+ "epoch": 0.72,
1322
+ "learning_rate": 0.00017267922849401024,
1323
+ "loss": 0.6145,
1324
+ "step": 19400
1325
+ },
1326
+ {
1327
+ "epoch": 0.73,
1328
+ "learning_rate": 0.00017241046556533472,
1329
+ "loss": 0.5936,
1330
+ "step": 19500
1331
+ },
1332
+ {
1333
+ "epoch": 0.73,
1334
+ "learning_rate": 0.0001721405985172428,
1335
+ "loss": 0.6273,
1336
+ "step": 19600
1337
+ },
1338
+ {
1339
+ "epoch": 0.73,
1340
+ "learning_rate": 0.0001718696314646846,
1341
+ "loss": 0.6059,
1342
+ "step": 19700
1343
+ },
1344
+ {
1345
+ "epoch": 0.74,
1346
+ "learning_rate": 0.000171597568539383,
1347
+ "loss": 0.5934,
1348
+ "step": 19800
1349
+ },
1350
+ {
1351
+ "epoch": 0.74,
1352
+ "learning_rate": 0.000171324413889771,
1353
+ "loss": 0.6243,
1354
+ "step": 19900
1355
+ },
1356
+ {
1357
+ "epoch": 0.75,
1358
+ "learning_rate": 0.00017105017168092808,
1359
+ "loss": 0.6164,
1360
+ "step": 20000
1361
+ },
1362
+ {
1363
+ "epoch": 0.75,
1364
+ "eval_loss": 0.6324757933616638,
1365
+ "eval_runtime": 1266.6769,
1366
+ "eval_samples_per_second": 0.428,
1367
+ "eval_steps_per_second": 0.428,
1368
+ "step": 20000
1369
+ },
1370
+ {
1371
+ "epoch": 0.75,
1372
+ "learning_rate": 0.0001707748460945171,
1373
+ "loss": 0.5953,
1374
+ "step": 20100
1375
+ },
1376
+ {
1377
+ "epoch": 0.75,
1378
+ "learning_rate": 0.0001704984413287202,
1379
+ "loss": 0.6329,
1380
+ "step": 20200
1381
+ },
1382
+ {
1383
+ "epoch": 0.76,
1384
+ "learning_rate": 0.00017022096159817493,
1385
+ "loss": 0.6227,
1386
+ "step": 20300
1387
+ },
1388
+ {
1389
+ "epoch": 0.76,
1390
+ "learning_rate": 0.00016994241113391003,
1391
+ "loss": 0.6022,
1392
+ "step": 20400
1393
+ },
1394
+ {
1395
+ "epoch": 0.76,
1396
+ "learning_rate": 0.0001696627941832808,
1397
+ "loss": 0.604,
1398
+ "step": 20500
1399
+ },
1400
+ {
1401
+ "epoch": 0.77,
1402
+ "learning_rate": 0.0001693821150099044,
1403
+ "loss": 0.6101,
1404
+ "step": 20600
1405
+ },
1406
+ {
1407
+ "epoch": 0.77,
1408
+ "learning_rate": 0.00016910037789359485,
1409
+ "loss": 0.6242,
1410
+ "step": 20700
1411
+ },
1412
+ {
1413
+ "epoch": 0.78,
1414
+ "learning_rate": 0.00016881758713029776,
1415
+ "loss": 0.6096,
1416
+ "step": 20800
1417
+ },
1418
+ {
1419
+ "epoch": 0.78,
1420
+ "learning_rate": 0.0001685337470320248,
1421
+ "loss": 0.5948,
1422
+ "step": 20900
1423
+ },
1424
+ {
1425
+ "epoch": 0.78,
1426
+ "learning_rate": 0.0001682488619267879,
1427
+ "loss": 0.5911,
1428
+ "step": 21000
1429
+ },
1430
+ {
1431
+ "epoch": 0.78,
1432
+ "eval_loss": 0.6282580494880676,
1433
+ "eval_runtime": 1313.1215,
1434
+ "eval_samples_per_second": 0.413,
1435
+ "eval_steps_per_second": 0.413,
1436
+ "step": 21000
1437
  }
1438
  ],
1439
  "max_steps": 80463,
1440
  "num_train_epochs": 3,
1441
+ "total_flos": 5.888261973449687e+18,
1442
  "trial_name": null,
1443
  "trial_params": null
1444
  }
{checkpoint-17000 β†’ checkpoint-21000}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6f52f1dc0e9f02b39b53daa1c87bbc62976c717096fc5d03aab7e139a51a837
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8ca8c55b410908f1a6fb4d78d55fe6aad82bbca76ec8021e18981496f18fa70
3
  size 4027