Training in progress, step 22500, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 36730224
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fc3712b1e057a16f372207b456fdeffd0c4bccabecebba6df60fbbd6e85f587
|
| 3 |
size 36730224
|
last-checkpoint/ar_diffusion_info.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1544
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8c31ca9ad2774f0cf15399d81f8dcf836e50e2ce585665a25f067e60f94bcf7
|
| 3 |
size 1544
|
last-checkpoint/optimizer.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 73588346
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b3d3278645373912f898a8fff510e120f19840077b176c8f23069af8f87b472e
|
| 3 |
size 73588346
|
last-checkpoint/rng_state.pth
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 14244
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0edeec917b285b0e88139f206de61b3e301e9b677aa51d87d86f36aba6dd8e7e
|
| 3 |
size 14244
|
last-checkpoint/scaler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 988
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1c79688ba387f89ea391b599b05911ed6d3d3bf9d86de9e8c20288888353c0b1
|
| 3 |
size 988
|
last-checkpoint/scheduler.pt
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 1064
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:605e1a7f916ea8b232a9d9961b8fcd6fb54050ae6ec5fb93bccd0252261b9e7d
|
| 3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
|
@@ -1,10 +1,10 @@
|
|
| 1 |
{
|
| 2 |
-
"best_global_step":
|
| 3 |
-
"best_metric": 1.
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-21000",
|
| 5 |
-
"epoch": 1.
|
| 6 |
"eval_steps": 250,
|
| 7 |
-
"global_step":
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
@@ -3792,6 +3792,92 @@
|
|
| 3792 |
"eval_samples_per_second": 56.27,
|
| 3793 |
"eval_steps_per_second": 14.067,
|
| 3794 |
"step": 22000
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3795 |
}
|
| 3796 |
],
|
| 3797 |
"logging_steps": 50,
|
|
|
|
| 1 |
{
|
| 2 |
+
"best_global_step": 22250,
|
| 3 |
+
"best_metric": 1.4596961736679077,
|
| 4 |
"best_model_checkpoint": "./ar-diffusion-checkpoints-fixed/checkpoint-21000",
|
| 5 |
+
"epoch": 1.7306361049150065,
|
| 6 |
"eval_steps": 250,
|
| 7 |
+
"global_step": 22500,
|
| 8 |
"is_hyper_param_search": false,
|
| 9 |
"is_local_process_zero": true,
|
| 10 |
"is_world_process_zero": true,
|
|
|
|
| 3792 |
"eval_samples_per_second": 56.27,
|
| 3793 |
"eval_steps_per_second": 14.067,
|
| 3794 |
"step": 22000
|
| 3795 |
+
},
|
| 3796 |
+
{
|
| 3797 |
+
"epoch": 1.6960233828167064,
|
| 3798 |
+
"grad_norm": 0.8225556015968323,
|
| 3799 |
+
"learning_rate": 8.81230034023323e-05,
|
| 3800 |
+
"loss": 1.4372,
|
| 3801 |
+
"step": 22050
|
| 3802 |
+
},
|
| 3803 |
+
{
|
| 3804 |
+
"epoch": 1.6998692408276286,
|
| 3805 |
+
"grad_norm": 1.495335578918457,
|
| 3806 |
+
"learning_rate": 8.78632833805158e-05,
|
| 3807 |
+
"loss": 1.4231,
|
| 3808 |
+
"step": 22100
|
| 3809 |
+
},
|
| 3810 |
+
{
|
| 3811 |
+
"epoch": 1.703715098838551,
|
| 3812 |
+
"grad_norm": 1.6913652420043945,
|
| 3813 |
+
"learning_rate": 8.760356335869933e-05,
|
| 3814 |
+
"loss": 1.4859,
|
| 3815 |
+
"step": 22150
|
| 3816 |
+
},
|
| 3817 |
+
{
|
| 3818 |
+
"epoch": 1.707560956849473,
|
| 3819 |
+
"grad_norm": 1.9825598001480103,
|
| 3820 |
+
"learning_rate": 8.734384333688285e-05,
|
| 3821 |
+
"loss": 1.4354,
|
| 3822 |
+
"step": 22200
|
| 3823 |
+
},
|
| 3824 |
+
{
|
| 3825 |
+
"epoch": 1.7114068148603954,
|
| 3826 |
+
"grad_norm": 2.212759017944336,
|
| 3827 |
+
"learning_rate": 8.708412331506635e-05,
|
| 3828 |
+
"loss": 1.3861,
|
| 3829 |
+
"step": 22250
|
| 3830 |
+
},
|
| 3831 |
+
{
|
| 3832 |
+
"epoch": 1.7114068148603954,
|
| 3833 |
+
"eval_loss": 1.4596961736679077,
|
| 3834 |
+
"eval_runtime": 17.8176,
|
| 3835 |
+
"eval_samples_per_second": 56.124,
|
| 3836 |
+
"eval_steps_per_second": 14.031,
|
| 3837 |
+
"step": 22250
|
| 3838 |
+
},
|
| 3839 |
+
{
|
| 3840 |
+
"epoch": 1.7152526728713176,
|
| 3841 |
+
"grad_norm": 1.4325975179672241,
|
| 3842 |
+
"learning_rate": 8.682440329324988e-05,
|
| 3843 |
+
"loss": 1.4805,
|
| 3844 |
+
"step": 22300
|
| 3845 |
+
},
|
| 3846 |
+
{
|
| 3847 |
+
"epoch": 1.7190985308822397,
|
| 3848 |
+
"grad_norm": 1.9796292781829834,
|
| 3849 |
+
"learning_rate": 8.65646832714334e-05,
|
| 3850 |
+
"loss": 1.3836,
|
| 3851 |
+
"step": 22350
|
| 3852 |
+
},
|
| 3853 |
+
{
|
| 3854 |
+
"epoch": 1.7229443888931621,
|
| 3855 |
+
"grad_norm": 1.6221562623977661,
|
| 3856 |
+
"learning_rate": 8.630496324961692e-05,
|
| 3857 |
+
"loss": 1.4176,
|
| 3858 |
+
"step": 22400
|
| 3859 |
+
},
|
| 3860 |
+
{
|
| 3861 |
+
"epoch": 1.7267902469040843,
|
| 3862 |
+
"grad_norm": 1.1431959867477417,
|
| 3863 |
+
"learning_rate": 8.604524322780043e-05,
|
| 3864 |
+
"loss": 1.4553,
|
| 3865 |
+
"step": 22450
|
| 3866 |
+
},
|
| 3867 |
+
{
|
| 3868 |
+
"epoch": 1.7306361049150065,
|
| 3869 |
+
"grad_norm": 1.1562083959579468,
|
| 3870 |
+
"learning_rate": 8.578552320598396e-05,
|
| 3871 |
+
"loss": 1.4489,
|
| 3872 |
+
"step": 22500
|
| 3873 |
+
},
|
| 3874 |
+
{
|
| 3875 |
+
"epoch": 1.7306361049150065,
|
| 3876 |
+
"eval_loss": 1.464021921157837,
|
| 3877 |
+
"eval_runtime": 17.8166,
|
| 3878 |
+
"eval_samples_per_second": 56.127,
|
| 3879 |
+
"eval_steps_per_second": 14.032,
|
| 3880 |
+
"step": 22500
|
| 3881 |
}
|
| 3882 |
],
|
| 3883 |
"logging_steps": 50,
|