ZeroUniqueness
commited on
Commit
β’
90d31cb
1
Parent(s):
b9dd262
Training in progress, step 6300
Browse files- adapter_model.bin +1 -1
- {checkpoint-5900 β checkpoint-6200/adapter_model}/README.md +0 -0
- {checkpoint-5900 β checkpoint-6200/adapter_model}/adapter_config.json +5 -5
- {checkpoint-5900 β checkpoint-6200/adapter_model}/adapter_model.bin +1 -1
- {checkpoint-5900/adapter_model β checkpoint-6300}/README.md +0 -0
- {checkpoint-5900/adapter_model β checkpoint-6300}/adapter_config.json +5 -5
- {checkpoint-5900/adapter_model β checkpoint-6300}/adapter_model.bin +1 -1
- {checkpoint-5900 β checkpoint-6300}/optimizer.pt +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_0.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_1.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_10.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_11.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_12.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_13.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_2.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_3.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_4.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_5.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_6.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_7.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_8.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/rng_state_9.pth +1 -1
- {checkpoint-5900 β checkpoint-6300}/scheduler.pt +1 -1
- {checkpoint-5900 β checkpoint-6300}/trainer_state.json +107 -3
- {checkpoint-5900 β checkpoint-6300}/training_args.bin +1 -1
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a03009057bc17a55fb03559fceee9416e58df44f2b8cad404853c77ef5e25ff
|
3 |
size 500897101
|
{checkpoint-5900 β checkpoint-6200/adapter_model}/README.md
RENAMED
File without changes
|
{checkpoint-5900 β checkpoint-6200/adapter_model}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"q_proj",
|
18 |
-
"v_proj",
|
19 |
-
"gate_proj",
|
20 |
"up_proj",
|
21 |
-
"
|
|
|
|
|
22 |
"k_proj",
|
23 |
-
"
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"up_proj",
|
18 |
+
"gate_proj",
|
19 |
+
"down_proj",
|
20 |
+
"q_proj",
|
21 |
"k_proj",
|
22 |
+
"o_proj",
|
23 |
+
"v_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-5900 β checkpoint-6200/adapter_model}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3febec860772b82c489867736d215315d0909ffc43a3ba7b9fbc1d0b41fd7a20
|
3 |
size 500897101
|
{checkpoint-5900/adapter_model β checkpoint-6300}/README.md
RENAMED
File without changes
|
{checkpoint-5900/adapter_model β checkpoint-6300}/adapter_config.json
RENAMED
@@ -14,13 +14,13 @@
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
17 |
-
"q_proj",
|
18 |
-
"v_proj",
|
19 |
-
"gate_proj",
|
20 |
"up_proj",
|
21 |
-
"
|
|
|
|
|
22 |
"k_proj",
|
23 |
-
"
|
|
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
|
|
14 |
"r": 32,
|
15 |
"revision": null,
|
16 |
"target_modules": [
|
|
|
|
|
|
|
17 |
"up_proj",
|
18 |
+
"gate_proj",
|
19 |
+
"down_proj",
|
20 |
+
"q_proj",
|
21 |
"k_proj",
|
22 |
+
"o_proj",
|
23 |
+
"v_proj"
|
24 |
],
|
25 |
"task_type": "CAUSAL_LM"
|
26 |
}
|
{checkpoint-5900/adapter_model β checkpoint-6300}/adapter_model.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 500897101
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7a03009057bc17a55fb03559fceee9416e58df44f2b8cad404853c77ef5e25ff
|
3 |
size 500897101
|
{checkpoint-5900 β checkpoint-6300}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1001752701
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:95b7209d5ad4284033cb4767ae444e7a78eba53804e8f6dfd2ae1d008b403ecb
|
3 |
size 1001752701
|
{checkpoint-5900 β checkpoint-6300}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4b0892c501b6dd52efec58730585273a276e5cf9f6937d96e9fa38ae5af9abd6
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed1c7ad44d4c44043a9a1040ab7a8d9bf8ccb6a798963c6beec8678ba53a99d5
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_10.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:51696bdc2a942c4b6659d0c7c032d47971cc912461202edc9cbf84be5b00b9d4
|
3 |
size 27789
|
{checkpoint-5900 β checkpoint-6300}/rng_state_11.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf2dc121d536b338f7f7ab71844d47b47fa50424082757312612a1ffb43ce6d6
|
3 |
size 27789
|
{checkpoint-5900 β checkpoint-6300}/rng_state_12.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:600cbb989fd1bd997e07afb75b51b0e9bb6bba91f6f55fd115d8b32524b2c2a2
|
3 |
size 27789
|
{checkpoint-5900 β checkpoint-6300}/rng_state_13.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27789
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4f2f55cc04908fd235b9c2e1fae05ac4e683f052b4ea8b4e70cb8d917af90168
|
3 |
size 27789
|
{checkpoint-5900 β checkpoint-6300}/rng_state_2.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:daf71be757b88dd90b278fe0d6abf41fdfd54d283da14639c698e92c6ee6204c
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_3.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d8d3a91b3606a86dfbe9591df14b8680debec4ee7ccd893c21fc87ce694c0de6
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_4.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d6af24a322399a9faa75e21470cca01ffb561d5fc8d90ce309043c8aede63c59
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_5.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6e40c161f1898bfa1d6decd05f837beb821a821ae27f1e34a964a8ec433edd39
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_6.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d5a9e186bb4ebc2493f0fa088e88be12fd79ba92df4b0d1984d04b88fb44615e
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_7.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5389b58d9b2d1b81fdd14d9dfa214e64e33c18b1e015479871952bbf5fbc9a9f
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_8.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:964ebee03695528c749c601db479754170f0284c59a3215d8dfcd92e48f3ae00
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/rng_state_9.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 27772
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:20f2b548b149282f75edcff3a569214186d9d16e83d775ade7543af31b397f99
|
3 |
size 27772
|
{checkpoint-5900 β checkpoint-6300}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 627
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fc486d593be71181a7b350459598105a51de879e181677c7b4a0bcff932974dc
|
3 |
size 627
|
{checkpoint-5900 β checkpoint-6300}/trainer_state.json
RENAMED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
-
"global_step":
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
@@ -1438,11 +1438,115 @@
|
|
1438 |
"learning_rate": 2.6618481296414522e-05,
|
1439 |
"loss": 0.7722,
|
1440 |
"step": 5900
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1441 |
}
|
1442 |
],
|
1443 |
"max_steps": 7737,
|
1444 |
"num_train_epochs": 3,
|
1445 |
-
"total_flos": 2.
|
1446 |
"trial_name": null,
|
1447 |
"trial_params": null
|
1448 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.44280728964715,
|
5 |
+
"global_step": 6300,
|
6 |
"is_hyper_param_search": false,
|
7 |
"is_local_process_zero": true,
|
8 |
"is_world_process_zero": true,
|
|
|
1438 |
"learning_rate": 2.6618481296414522e-05,
|
1439 |
"loss": 0.7722,
|
1440 |
"step": 5900
|
1441 |
+
},
|
1442 |
+
{
|
1443 |
+
"epoch": 2.3,
|
1444 |
+
"learning_rate": 2.5931770576738313e-05,
|
1445 |
+
"loss": 0.7698,
|
1446 |
+
"step": 5925
|
1447 |
+
},
|
1448 |
+
{
|
1449 |
+
"epoch": 2.31,
|
1450 |
+
"learning_rate": 2.525271205441837e-05,
|
1451 |
+
"loss": 0.7751,
|
1452 |
+
"step": 5950
|
1453 |
+
},
|
1454 |
+
{
|
1455 |
+
"epoch": 2.32,
|
1456 |
+
"learning_rate": 2.45813758849028e-05,
|
1457 |
+
"loss": 0.766,
|
1458 |
+
"step": 5975
|
1459 |
+
},
|
1460 |
+
{
|
1461 |
+
"epoch": 2.33,
|
1462 |
+
"learning_rate": 2.3917831425821824e-05,
|
1463 |
+
"loss": 0.7673,
|
1464 |
+
"step": 6000
|
1465 |
+
},
|
1466 |
+
{
|
1467 |
+
"epoch": 2.33,
|
1468 |
+
"eval_loss": 0.806900680065155,
|
1469 |
+
"eval_runtime": 58.3254,
|
1470 |
+
"eval_samples_per_second": 12.516,
|
1471 |
+
"eval_steps_per_second": 0.909,
|
1472 |
+
"step": 6000
|
1473 |
+
},
|
1474 |
+
{
|
1475 |
+
"epoch": 2.34,
|
1476 |
+
"learning_rate": 2.3262147229821984e-05,
|
1477 |
+
"loss": 0.7679,
|
1478 |
+
"step": 6025
|
1479 |
+
},
|
1480 |
+
{
|
1481 |
+
"epoch": 2.35,
|
1482 |
+
"learning_rate": 2.2614391037483983e-05,
|
1483 |
+
"loss": 0.7704,
|
1484 |
+
"step": 6050
|
1485 |
+
},
|
1486 |
+
{
|
1487 |
+
"epoch": 2.36,
|
1488 |
+
"learning_rate": 2.1974629770324106e-05,
|
1489 |
+
"loss": 0.7661,
|
1490 |
+
"step": 6075
|
1491 |
+
},
|
1492 |
+
{
|
1493 |
+
"epoch": 2.37,
|
1494 |
+
"learning_rate": 2.1342929523880416e-05,
|
1495 |
+
"loss": 0.7652,
|
1496 |
+
"step": 6100
|
1497 |
+
},
|
1498 |
+
{
|
1499 |
+
"epoch": 2.37,
|
1500 |
+
"learning_rate": 2.0719355560884246e-05,
|
1501 |
+
"loss": 0.765,
|
1502 |
+
"step": 6125
|
1503 |
+
},
|
1504 |
+
{
|
1505 |
+
"epoch": 2.38,
|
1506 |
+
"learning_rate": 2.010397230451766e-05,
|
1507 |
+
"loss": 0.7704,
|
1508 |
+
"step": 6150
|
1509 |
+
},
|
1510 |
+
{
|
1511 |
+
"epoch": 2.39,
|
1512 |
+
"learning_rate": 1.9496843331757784e-05,
|
1513 |
+
"loss": 0.767,
|
1514 |
+
"step": 6175
|
1515 |
+
},
|
1516 |
+
{
|
1517 |
+
"epoch": 2.4,
|
1518 |
+
"learning_rate": 1.8898031366808467e-05,
|
1519 |
+
"loss": 0.7654,
|
1520 |
+
"step": 6200
|
1521 |
+
},
|
1522 |
+
{
|
1523 |
+
"epoch": 2.41,
|
1524 |
+
"learning_rate": 1.830759827462004e-05,
|
1525 |
+
"loss": 0.7753,
|
1526 |
+
"step": 6225
|
1527 |
+
},
|
1528 |
+
{
|
1529 |
+
"epoch": 2.42,
|
1530 |
+
"learning_rate": 1.7725605054497906e-05,
|
1531 |
+
"loss": 0.7725,
|
1532 |
+
"step": 6250
|
1533 |
+
},
|
1534 |
+
{
|
1535 |
+
"epoch": 2.43,
|
1536 |
+
"learning_rate": 1.7152111833800522e-05,
|
1537 |
+
"loss": 0.7698,
|
1538 |
+
"step": 6275
|
1539 |
+
},
|
1540 |
+
{
|
1541 |
+
"epoch": 2.44,
|
1542 |
+
"learning_rate": 1.6587177861727454e-05,
|
1543 |
+
"loss": 0.7703,
|
1544 |
+
"step": 6300
|
1545 |
}
|
1546 |
],
|
1547 |
"max_steps": 7737,
|
1548 |
"num_train_epochs": 3,
|
1549 |
+
"total_flos": 2.7132330413479952e+19,
|
1550 |
"trial_name": null,
|
1551 |
"trial_params": null
|
1552 |
}
|
{checkpoint-5900 β checkpoint-6300}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4027
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e85009bca9623c846e630c294adb80ecbcd9e720da8da9f9ee5311b562908b91
|
3 |
size 4027
|