Training in progress, step 24500, checkpoint
Browse files
last-checkpoint/model-00001-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 4978139416
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:562e4a9f15032cff37840fc99c24b36c2938f6824c4ca58590bc282b9eceefae
|
3 |
size 4978139416
|
last-checkpoint/model-00002-of-00002.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3659223436
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:efa372ea2585805112312160143a4a2b5f3a5057ba30e3e0eeff9dd31782e88f
|
3 |
size 3659223436
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 17241500333
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:28cf928a2adfc5e5cc511e1fa21a7d847528c68010f1f44667b1fe6fd9def055
|
3 |
size 17241500333
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eff48e968f7fe23fe47e4973cf98e3ccc5305e7ef8b406ef287a82f2825196c0
|
3 |
+
size 14567
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 623
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:006849dff6df46bbddfe5a9b04e68547e9c816ca6762fb11abd57f00f2fd9546
|
3 |
size 623
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 10.
|
5 |
"eval_steps": 100,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -3863,6 +3863,70 @@
|
|
3863 |
"eval_samples_per_second": 25.751,
|
3864 |
"eval_steps_per_second": 3.22,
|
3865 |
"step": 24100
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
3866 |
}
|
3867 |
],
|
3868 |
"logging_steps": 100,
|
@@ -3870,7 +3934,7 @@
|
|
3870 |
"num_input_tokens_seen": 0,
|
3871 |
"num_train_epochs": 30,
|
3872 |
"save_steps": 100,
|
3873 |
-
"total_flos": 2.
|
3874 |
"train_batch_size": 8,
|
3875 |
"trial_name": null,
|
3876 |
"trial_params": null
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 10.313618185645128,
|
5 |
"eval_steps": 100,
|
6 |
+
"global_step": 24500,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
3863 |
"eval_samples_per_second": 25.751,
|
3864 |
"eval_steps_per_second": 3.22,
|
3865 |
"step": 24100
|
3866 |
+
},
|
3867 |
+
{
|
3868 |
+
"epoch": 10.19,
|
3869 |
+
"grad_norm": 2.6913535594940186,
|
3870 |
+
"learning_rate": 3.3260777385159016e-05,
|
3871 |
+
"loss": 1.2071,
|
3872 |
+
"step": 24200
|
3873 |
+
},
|
3874 |
+
{
|
3875 |
+
"epoch": 10.19,
|
3876 |
+
"eval_cer": 0.43098382933792184,
|
3877 |
+
"eval_loss": 1.9375296831130981,
|
3878 |
+
"eval_runtime": 388.5754,
|
3879 |
+
"eval_samples_per_second": 24.392,
|
3880 |
+
"eval_steps_per_second": 3.05,
|
3881 |
+
"step": 24200
|
3882 |
+
},
|
3883 |
+
{
|
3884 |
+
"epoch": 10.23,
|
3885 |
+
"grad_norm": 2.8861985206604004,
|
3886 |
+
"learning_rate": 3.319010600706714e-05,
|
3887 |
+
"loss": 1.2745,
|
3888 |
+
"step": 24300
|
3889 |
+
},
|
3890 |
+
{
|
3891 |
+
"epoch": 10.23,
|
3892 |
+
"eval_cer": 0.43681563098822884,
|
3893 |
+
"eval_loss": 1.8604010343551636,
|
3894 |
+
"eval_runtime": 365.4924,
|
3895 |
+
"eval_samples_per_second": 25.932,
|
3896 |
+
"eval_steps_per_second": 3.242,
|
3897 |
+
"step": 24300
|
3898 |
+
},
|
3899 |
+
{
|
3900 |
+
"epoch": 10.27,
|
3901 |
+
"grad_norm": 3.54598069190979,
|
3902 |
+
"learning_rate": 3.3119434628975265e-05,
|
3903 |
+
"loss": 1.3486,
|
3904 |
+
"step": 24400
|
3905 |
+
},
|
3906 |
+
{
|
3907 |
+
"epoch": 10.27,
|
3908 |
+
"eval_cer": 0.4357133080442689,
|
3909 |
+
"eval_loss": 1.3347864151000977,
|
3910 |
+
"eval_runtime": 380.5736,
|
3911 |
+
"eval_samples_per_second": 24.905,
|
3912 |
+
"eval_steps_per_second": 3.114,
|
3913 |
+
"step": 24400
|
3914 |
+
},
|
3915 |
+
{
|
3916 |
+
"epoch": 10.31,
|
3917 |
+
"grad_norm": 2.1198437213897705,
|
3918 |
+
"learning_rate": 3.304876325088339e-05,
|
3919 |
+
"loss": 1.1866,
|
3920 |
+
"step": 24500
|
3921 |
+
},
|
3922 |
+
{
|
3923 |
+
"epoch": 10.31,
|
3924 |
+
"eval_cer": 0.4299792733956435,
|
3925 |
+
"eval_loss": 1.308254361152649,
|
3926 |
+
"eval_runtime": 368.9525,
|
3927 |
+
"eval_samples_per_second": 25.689,
|
3928 |
+
"eval_steps_per_second": 3.212,
|
3929 |
+
"step": 24500
|
3930 |
}
|
3931 |
],
|
3932 |
"logging_steps": 100,
|
|
|
3934 |
"num_input_tokens_seen": 0,
|
3935 |
"num_train_epochs": 30,
|
3936 |
"save_steps": 100,
|
3937 |
+
"total_flos": 2.6839589636223064e+20,
|
3938 |
"train_batch_size": 8,
|
3939 |
"trial_name": null,
|
3940 |
"trial_params": null
|