Training in progress, epoch 1, checkpoint
Browse files- checkpoint-3599/adapter_model.safetensors +1 -1
- checkpoint-3599/optimizer.pt +1 -1
- checkpoint-3599/rng_state_0.pth +1 -1
- checkpoint-3599/rng_state_1.pth +1 -1
- checkpoint-3599/rng_state_2.pth +1 -1
- checkpoint-3599/rng_state_3.pth +1 -1
- checkpoint-3599/trainer_state.json +36 -36
- checkpoint-3599/training_args.bin +1 -1
checkpoint-3599/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 541459256
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e80772e2eadbfaba74c7bd52ee5780fe403361d763193581d8c19cacf6f60f09
|
| 3 |
size 541459256
|
checkpoint-3599/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 33662074
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fda060a95f49b34613c5aca089766930ecff14dd3ff6e9c13192aed0351b0022
|
| 3 |
size 33662074
|
checkpoint-3599/rng_state_0.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ed9788f623dbedcffdd08e10e7e7c688263bd7e046f284da03c20e238150c564
|
| 3 |
size 15024
|
checkpoint-3599/rng_state_1.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e90539e2e1f01024a2fc0f2b33e6699e8fab3c3f42a8d71f4cd892272a95f29f
|
| 3 |
size 15024
|
checkpoint-3599/rng_state_2.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ad8910a4ffa0dc7251ac9e854005ca1e2bfaecbe884141af2869975e76d80c0
|
| 3 |
size 15024
|
checkpoint-3599/rng_state_3.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 15024
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5c1dd7389977a275bf118f5a82bc34870d5f7ffbe5bc19671403b0f80601a782
|
| 3 |
size 15024
|
checkpoint-3599/trainer_state.json
CHANGED
|
@@ -10,83 +10,83 @@
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13892747985551543,
|
| 13 |
-
"grad_norm": 0.
|
| 14 |
-
"learning_rate": 0.
|
| 15 |
-
"loss": 0.
|
| 16 |
"step": 500
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.2000555709919422,
|
| 20 |
-
"eval_loss": 0.
|
| 21 |
-
"eval_runtime": 16.
|
| 22 |
-
"eval_samples_per_second": 30.
|
| 23 |
-
"eval_steps_per_second": 3.
|
| 24 |
"step": 720
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.27785495971103086,
|
| 28 |
-
"grad_norm": 0.
|
| 29 |
-
"learning_rate": 0.
|
| 30 |
-
"loss": 0.
|
| 31 |
"step": 1000
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.4001111419838844,
|
| 35 |
-
"eval_loss": 0.
|
| 36 |
-
"eval_runtime": 16.
|
| 37 |
-
"eval_samples_per_second": 30.
|
| 38 |
"eval_steps_per_second": 3.822,
|
| 39 |
"step": 1440
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.41678243956654626,
|
| 43 |
-
"grad_norm": 0.
|
| 44 |
-
"learning_rate": 0.
|
| 45 |
-
"loss": 0.
|
| 46 |
"step": 1500
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.5557099194220617,
|
| 50 |
-
"grad_norm": 0.
|
| 51 |
-
"learning_rate": 0.
|
| 52 |
-
"loss": 0.
|
| 53 |
"step": 2000
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 0.6001667129758266,
|
| 57 |
-
"eval_loss": 0.
|
| 58 |
-
"eval_runtime": 16.
|
| 59 |
-
"eval_samples_per_second": 30.
|
| 60 |
-
"eval_steps_per_second": 3.
|
| 61 |
"step": 2160
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"epoch": 0.6946373992775771,
|
| 65 |
-
"grad_norm": 0.
|
| 66 |
-
"learning_rate": 0.
|
| 67 |
-
"loss": 0.
|
| 68 |
"step": 2500
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"epoch": 0.8002222839677688,
|
| 72 |
-
"eval_loss": 0.
|
| 73 |
-
"eval_runtime": 16.
|
| 74 |
-
"eval_samples_per_second": 30.
|
| 75 |
-
"eval_steps_per_second": 3.
|
| 76 |
"step": 2880
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"epoch": 0.8335648791330925,
|
| 80 |
-
"grad_norm": 0.
|
| 81 |
-
"learning_rate": 0.
|
| 82 |
-
"loss": 0.
|
| 83 |
"step": 3000
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"epoch": 0.972492358988608,
|
| 87 |
-
"grad_norm": 0.
|
| 88 |
"learning_rate": 0.0003676113735296842,
|
| 89 |
-
"loss": 0.
|
| 90 |
"step": 3500
|
| 91 |
}
|
| 92 |
],
|
|
@@ -107,7 +107,7 @@
|
|
| 107 |
"attributes": {}
|
| 108 |
}
|
| 109 |
},
|
| 110 |
-
"total_flos": 6.
|
| 111 |
"train_batch_size": 4,
|
| 112 |
"trial_name": null,
|
| 113 |
"trial_params": null
|
|
|
|
| 10 |
"log_history": [
|
| 11 |
{
|
| 12 |
"epoch": 0.13892747985551543,
|
| 13 |
+
"grad_norm": 0.4299773573875427,
|
| 14 |
+
"learning_rate": 0.0003953968695007873,
|
| 15 |
+
"loss": 0.2967,
|
| 16 |
"step": 500
|
| 17 |
},
|
| 18 |
{
|
| 19 |
"epoch": 0.2000555709919422,
|
| 20 |
+
"eval_loss": 0.1983761489391327,
|
| 21 |
+
"eval_runtime": 16.4512,
|
| 22 |
+
"eval_samples_per_second": 30.393,
|
| 23 |
+
"eval_steps_per_second": 3.83,
|
| 24 |
"step": 720
|
| 25 |
},
|
| 26 |
{
|
| 27 |
"epoch": 0.27785495971103086,
|
| 28 |
+
"grad_norm": 0.5706949830055237,
|
| 29 |
+
"learning_rate": 0.0003907659535056034,
|
| 30 |
+
"loss": 0.1959,
|
| 31 |
"step": 1000
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"epoch": 0.4001111419838844,
|
| 35 |
+
"eval_loss": 0.17383727431297302,
|
| 36 |
+
"eval_runtime": 16.4842,
|
| 37 |
+
"eval_samples_per_second": 30.332,
|
| 38 |
"eval_steps_per_second": 3.822,
|
| 39 |
"step": 1440
|
| 40 |
},
|
| 41 |
{
|
| 42 |
"epoch": 0.41678243956654626,
|
| 43 |
+
"grad_norm": 0.5392869710922241,
|
| 44 |
+
"learning_rate": 0.0003861350375104196,
|
| 45 |
+
"loss": 0.1723,
|
| 46 |
"step": 1500
|
| 47 |
},
|
| 48 |
{
|
| 49 |
"epoch": 0.5557099194220617,
|
| 50 |
+
"grad_norm": 0.5005412697792053,
|
| 51 |
+
"learning_rate": 0.0003815041215152357,
|
| 52 |
+
"loss": 0.1636,
|
| 53 |
"step": 2000
|
| 54 |
},
|
| 55 |
{
|
| 56 |
"epoch": 0.6001667129758266,
|
| 57 |
+
"eval_loss": 0.16229495406150818,
|
| 58 |
+
"eval_runtime": 16.4963,
|
| 59 |
+
"eval_samples_per_second": 30.31,
|
| 60 |
+
"eval_steps_per_second": 3.819,
|
| 61 |
"step": 2160
|
| 62 |
},
|
| 63 |
{
|
| 64 |
"epoch": 0.6946373992775771,
|
| 65 |
+
"grad_norm": 0.370914489030838,
|
| 66 |
+
"learning_rate": 0.0003768732055200519,
|
| 67 |
+
"loss": 0.1539,
|
| 68 |
"step": 2500
|
| 69 |
},
|
| 70 |
{
|
| 71 |
"epoch": 0.8002222839677688,
|
| 72 |
+
"eval_loss": 0.1537286937236786,
|
| 73 |
+
"eval_runtime": 16.4662,
|
| 74 |
+
"eval_samples_per_second": 30.365,
|
| 75 |
+
"eval_steps_per_second": 3.826,
|
| 76 |
"step": 2880
|
| 77 |
},
|
| 78 |
{
|
| 79 |
"epoch": 0.8335648791330925,
|
| 80 |
+
"grad_norm": 0.41378697752952576,
|
| 81 |
+
"learning_rate": 0.000372242289524868,
|
| 82 |
+
"loss": 0.1445,
|
| 83 |
"step": 3000
|
| 84 |
},
|
| 85 |
{
|
| 86 |
"epoch": 0.972492358988608,
|
| 87 |
+
"grad_norm": 0.4000154137611389,
|
| 88 |
"learning_rate": 0.0003676113735296842,
|
| 89 |
+
"loss": 0.1384,
|
| 90 |
"step": 3500
|
| 91 |
}
|
| 92 |
],
|
|
|
|
| 107 |
"attributes": {}
|
| 108 |
}
|
| 109 |
},
|
| 110 |
+
"total_flos": 6.778569565551985e+17,
|
| 111 |
"train_batch_size": 4,
|
| 112 |
"trial_name": null,
|
| 113 |
"trial_params": null
|
checkpoint-3599/training_args.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 5496
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2784cc9ebb113293b9d7c5af564dbf5463d67b520bf149c8840105fec4706ec
|
| 3 |
size 5496
|