diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..afe8dd0e24ff0670b8fb8896b51f69b745d09baa --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:61e733593d7d6556df0b1ae9d46c088a85a0cb9e374390c97a250dd4985e332a +size 26241825 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89cd4d5475ee0b51acb980fd4c9619ba51d6b225 --- /dev/null +++ b/all_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.0, + "train_loss": 1.4147593358881598, + "train_runtime": 86303.1376, + "train_samples_per_second": 7.507, + "train_steps_per_second": 0.078 +} \ No newline at end of file diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.bin b/checkpoint-1000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..433e83739680a37c826fc1ef71103f4d83ea9637 --- /dev/null +++ b/checkpoint-1000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea16ca709f8e0d8d2ebc770b5437b8204ed833d99ab6960894435356cc03064 +size 26241825 diff --git a/checkpoint-1000/finetuning_args.json b/checkpoint-1000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/checkpoint-1000/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..55b39b53e08bed7a776faf093cf984ad191f75e1 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7068fb198fe41a6dd7e07551bf91279906e39e413e8664433b4868b479335c12 +size 52496005 diff --git a/checkpoint-1000/rng_state_0.pth b/checkpoint-1000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..bfab17495c3cef4ce442938138e58f44b27a56cf --- /dev/null +++ b/checkpoint-1000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e47a25cfc194b1b83cf419f99c030a5e3ab3d1fd81ab1dfac1fcb693c12c5885 +size 18679 diff --git a/checkpoint-1000/rng_state_1.pth b/checkpoint-1000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..0ef7ecf5487bbcf750f1037a520dafb3b7ae0ead --- /dev/null +++ b/checkpoint-1000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9e136123e8ca2ed31622d97a27a3cdb90ac955dd45d3458d6548b568edf43fa +size 18679 diff --git a/checkpoint-1000/rng_state_2.pth b/checkpoint-1000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..7be3772ddcd4893f7ac25b58fc669f6b0d34ca89 --- /dev/null +++ b/checkpoint-1000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2c50c50859ca908045c331dfa3e0c222c04ffe6c2da09b427dd6cc5dab2ecb0 +size 18679 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1e9d10642e7c1d458f00fc19988e6deac23cd94 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ab40d1a86eccca3aa74fea1b90d31aaa3a10f5b9ee433d980eeb36a502cd1eed +size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..4c6d040145964f257e6fefd4a25a7549e1ca6a2c --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.29631824579598487, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 1.731069029080105e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/checkpoint-2000/README.md b/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-2000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-2000/adapter_config.json b/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/checkpoint-2000/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2000/adapter_model.bin b/checkpoint-2000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..736af1f9cf296e5939e24979e979b9711b00337a --- /dev/null +++ b/checkpoint-2000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9db146188f1c29cb5b47bb5f2871d5cd38afd2ddd9262f017bcff865bd5d8eb2 +size 26241825 diff --git a/checkpoint-2000/finetuning_args.json b/checkpoint-2000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/checkpoint-2000/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..44f7f51495bc642bbc9ccb95b038ac305f5046cd --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8af3f668928a3787eeb0cee4505fe8112e8a56a650a96edb95d6fd1d63efb385 +size 52496005 diff --git a/checkpoint-2000/rng_state_0.pth b/checkpoint-2000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..fd01dab4c042b8d0162f2ee76bca088e737f19b3 --- /dev/null +++ b/checkpoint-2000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3d1fbeb8be97fe1e93610c351c57d2b4f1912d3cce8014b3939ce86938b9720b +size 18679 diff --git a/checkpoint-2000/rng_state_1.pth b/checkpoint-2000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..1ca9977ef2debee468af0d25c465038240b89f96 --- /dev/null +++ b/checkpoint-2000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7bdc9f85e2c0b2ce3549fce0143b02b51f54c98dd43f0aa027d304f8e1baffcd +size 18679 diff --git a/checkpoint-2000/rng_state_2.pth b/checkpoint-2000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..c4f5a2aa2c7f5efd8775d491a8504c065f1da813 --- /dev/null +++ b/checkpoint-2000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba7245164663e5fce334c9c73d0691c8eef29308e1cda552389421bc4ba34eba +size 18679 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..aadd49a9aa6976875f559215dcda2ef34360a7ee --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6921a5bc31311d5eab2c7a96c842b94c73dd0de7cf0af0f4565e92eabf60f29 +size 627 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..07a5442ba7176f4709033e86c4e98ddc4323b2e9 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,1216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5926364915919697, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 4.728677618572965e-05, + "loss": 1.4341, + "step": 1010 + }, + { + "epoch": 0.3, + "learning_rate": 4.723380124395985e-05, + "loss": 1.4526, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 4.7180344395969675e-05, + "loss": 1.4402, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 4.712640680040884e-05, + "loss": 1.4257, + "step": 1040 + }, + { + "epoch": 0.31, + "learning_rate": 4.707198962634701e-05, + "loss": 1.4232, + "step": 1050 + }, + { + "epoch": 0.31, + "learning_rate": 4.70170940532485e-05, + "loss": 1.4485, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 4.6961721270946635e-05, + "loss": 1.456, + "step": 1070 + }, + { + "epoch": 0.32, + "learning_rate": 4.690587247961804e-05, + "loss": 1.4555, + "step": 1080 + }, + { + "epoch": 0.32, + "learning_rate": 4.684954888975657e-05, + "loss": 1.4376, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 4.6792751722147104e-05, + "loss": 1.4353, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6735482207839074e-05, + "loss": 1.4226, + "step": 1110 + }, + { + "epoch": 0.33, + "learning_rate": 4.6677741588119784e-05, + "loss": 1.4315, + "step": 1120 + }, + { + "epoch": 0.33, + "learning_rate": 4.66195311144875e-05, + "loss": 1.4303, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 4.6560852048624345e-05, + "loss": 1.4288, + "step": 1140 + }, + { + "epoch": 0.34, + "learning_rate": 4.650170566236892e-05, + "loss": 1.4539, + "step": 1150 + }, + { + "epoch": 0.34, + "learning_rate": 4.6442093237688756e-05, + "loss": 1.4527, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 4.6382016066652556e-05, + "loss": 1.4406, + "step": 1170 + }, + { + "epoch": 0.35, + "learning_rate": 4.632147545140212e-05, + "loss": 1.4233, + "step": 1180 + }, + { + "epoch": 0.35, + "learning_rate": 4.626047270412419e-05, + "loss": 1.426, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 4.619900914702198e-05, + "loss": 1.4577, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 4.613708611228652e-05, + "loss": 1.4313, + "step": 1210 + }, + { + "epoch": 0.36, + "learning_rate": 4.607470494206776e-05, + "loss": 1.4129, + "step": 1220 + }, + { + "epoch": 0.36, + "learning_rate": 4.601186698844554e-05, + "loss": 1.4368, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 4.594857361340021e-05, + "loss": 1.4342, + "step": 1240 + }, + { + "epoch": 0.37, + "learning_rate": 4.588482618878316e-05, + "loss": 1.4438, + "step": 1250 + }, + { + "epoch": 0.37, + "learning_rate": 4.582062609628709e-05, + "loss": 1.4263, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 4.575597472741601e-05, + "loss": 1.4379, + "step": 1270 + }, + { + "epoch": 0.38, + "learning_rate": 4.569087348345512e-05, + "loss": 1.4221, + "step": 1280 + }, + { + "epoch": 0.38, + "learning_rate": 4.562532377544046e-05, + "loss": 1.4414, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 4.5559327024128265e-05, + "loss": 1.4395, + "step": 1300 + }, + { + "epoch": 0.39, + "learning_rate": 4.549288465996421e-05, + "loss": 1.4278, + "step": 1310 + }, + { + "epoch": 0.39, + "learning_rate": 4.542599812305243e-05, + "loss": 1.4344, + "step": 1320 + }, + { + "epoch": 0.39, + "learning_rate": 4.535866886312423e-05, + "loss": 1.4352, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 4.529089833950675e-05, + "loss": 1.4133, + "step": 1340 + }, + { + "epoch": 0.4, + "learning_rate": 4.5222688021091266e-05, + "loss": 1.4506, + "step": 1350 + }, + { + "epoch": 0.4, + "learning_rate": 4.5154039386301385e-05, + "loss": 1.4295, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 4.5084953923061016e-05, + "loss": 1.4389, + "step": 1370 + }, + { + "epoch": 0.41, + "learning_rate": 4.5015433128762065e-05, + "loss": 1.4247, + "step": 1380 + }, + { + "epoch": 0.41, + "learning_rate": 4.494547851023205e-05, + "loss": 1.4347, + "step": 1390 + }, + { + "epoch": 0.41, + "learning_rate": 4.487509158370139e-05, + "loss": 1.4133, + "step": 1400 + }, + { + "epoch": 0.42, + "learning_rate": 4.480427387477056e-05, + "loss": 1.4296, + "step": 1410 + }, + { + "epoch": 0.42, + "learning_rate": 4.473302691837702e-05, + "loss": 1.4353, + "step": 1420 + }, + { + "epoch": 0.42, + "learning_rate": 4.466135225876194e-05, + "loss": 1.4377, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 4.458925144943676e-05, + "loss": 1.4168, + "step": 1440 + }, + { + "epoch": 0.43, + "learning_rate": 4.451672605314948e-05, + "loss": 1.4334, + "step": 1450 + }, + { + "epoch": 0.43, + "learning_rate": 4.444377764185082e-05, + "loss": 1.44, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 4.43704077966601e-05, + "loss": 1.4375, + "step": 1470 + }, + { + "epoch": 0.44, + "learning_rate": 4.4296618107831036e-05, + "loss": 1.447, + "step": 1480 + }, + { + "epoch": 0.44, + "learning_rate": 4.422241017471722e-05, + "loss": 1.4151, + "step": 1490 + }, + { + "epoch": 0.44, + "learning_rate": 4.414778560573749e-05, + "loss": 1.4388, + "step": 1500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4072746018341036e-05, + "loss": 1.4228, + "step": 1510 + }, + { + "epoch": 0.45, + "learning_rate": 4.399729303897238e-05, + "loss": 1.4104, + "step": 1520 + }, + { + "epoch": 0.45, + "learning_rate": 4.392142830303608e-05, + "loss": 1.4441, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 4.384515345486131e-05, + "loss": 1.4282, + "step": 1540 + }, + { + "epoch": 0.46, + "learning_rate": 4.376847014766623e-05, + "loss": 1.4271, + "step": 1550 + }, + { + "epoch": 0.46, + "learning_rate": 4.369138004352212e-05, + "loss": 1.4223, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 4.3613884813317406e-05, + "loss": 1.425, + "step": 1570 + }, + { + "epoch": 0.47, + "learning_rate": 4.3535986136721377e-05, + "loss": 1.4392, + "step": 1580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3457685702147834e-05, + "loss": 1.4097, + "step": 1590 + }, + { + "epoch": 0.47, + "learning_rate": 4.3378985206718484e-05, + "loss": 1.4405, + "step": 1600 + }, + { + "epoch": 0.48, + "learning_rate": 4.329988635622611e-05, + "loss": 1.4311, + "step": 1610 + }, + { + "epoch": 0.48, + "learning_rate": 4.322039086509769e-05, + "loss": 1.4358, + "step": 1620 + }, + { + "epoch": 0.48, + "learning_rate": 4.3140500456357145e-05, + "loss": 1.4114, + "step": 1630 + }, + { + "epoch": 0.49, + "learning_rate": 4.306021686158805e-05, + "loss": 1.4165, + "step": 1640 + }, + { + "epoch": 0.49, + "learning_rate": 4.297954182089609e-05, + "loss": 1.4309, + "step": 1650 + }, + { + "epoch": 0.49, + "learning_rate": 4.289847708287129e-05, + "loss": 1.4215, + "step": 1660 + }, + { + "epoch": 0.49, + "learning_rate": 4.2817024404550246e-05, + "loss": 1.4124, + "step": 1670 + }, + { + "epoch": 0.5, + "learning_rate": 4.2735185551377895e-05, + "loss": 1.4001, + "step": 1680 + }, + { + "epoch": 0.5, + "learning_rate": 4.265296229716935e-05, + "loss": 1.4302, + "step": 1690 + }, + { + "epoch": 0.5, + "learning_rate": 4.25703564240714e-05, + "loss": 1.4211, + "step": 1700 + }, + { + "epoch": 0.51, + "learning_rate": 4.2487369722523906e-05, + "loss": 1.4423, + "step": 1710 + }, + { + "epoch": 0.51, + "learning_rate": 4.240400399122101e-05, + "loss": 1.4299, + "step": 1720 + }, + { + "epoch": 0.51, + "learning_rate": 4.232026103707209e-05, + "loss": 1.4214, + "step": 1730 + }, + { + "epoch": 0.52, + "learning_rate": 4.223614267516268e-05, + "loss": 1.4348, + "step": 1740 + }, + { + "epoch": 0.52, + "learning_rate": 4.215165072871505e-05, + "loss": 1.4315, + "step": 1750 + }, + { + "epoch": 0.52, + "learning_rate": 4.206678702904874e-05, + "loss": 1.4098, + "step": 1760 + }, + { + "epoch": 0.52, + "learning_rate": 4.198155341554084e-05, + "loss": 1.4242, + "step": 1770 + }, + { + "epoch": 0.53, + "learning_rate": 4.1895951735586145e-05, + "loss": 1.4272, + "step": 1780 + }, + { + "epoch": 0.53, + "learning_rate": 4.1809983844557085e-05, + "loss": 1.4452, + "step": 1790 + }, + { + "epoch": 0.53, + "learning_rate": 4.172365160576355e-05, + "loss": 1.431, + "step": 1800 + }, + { + "epoch": 0.54, + "learning_rate": 4.163695689041245e-05, + "loss": 1.4389, + "step": 1810 + }, + { + "epoch": 0.54, + "learning_rate": 4.154990157756722e-05, + "loss": 1.413, + "step": 1820 + }, + { + "epoch": 0.54, + "learning_rate": 4.1462487554107036e-05, + "loss": 1.3893, + "step": 1830 + }, + { + "epoch": 0.55, + "learning_rate": 4.137471671468596e-05, + "loss": 1.4052, + "step": 1840 + }, + { + "epoch": 0.55, + "learning_rate": 4.128659096169183e-05, + "loss": 1.4173, + "step": 1850 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198112205205096e-05, + "loss": 1.4012, + "step": 1860 + }, + { + "epoch": 0.55, + "learning_rate": 4.110928236295734e-05, + "loss": 1.4119, + "step": 1870 + }, + { + "epoch": 0.56, + "learning_rate": 4.102010336028975e-05, + "loss": 1.4111, + "step": 1880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0930577130111424e-05, + "loss": 1.4156, + "step": 1890 + }, + { + "epoch": 0.56, + "learning_rate": 4.084070561285739e-05, + "loss": 1.4419, + "step": 1900 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750490756446624e-05, + "loss": 1.4121, + "step": 1910 + }, + { + "epoch": 0.57, + "learning_rate": 4.0659934516239795e-05, + "loss": 1.4204, + "step": 1920 + }, + { + "epoch": 0.57, + "learning_rate": 4.056903885499689e-05, + "loss": 1.4032, + "step": 1930 + }, + { + "epoch": 0.57, + "learning_rate": 4.047780574283466e-05, + "loss": 1.4207, + "step": 1940 + }, + { + "epoch": 0.58, + "learning_rate": 4.038623715718397e-05, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.58, + "learning_rate": 4.029433508274686e-05, + "loss": 1.4228, + "step": 1960 + }, + { + "epoch": 0.58, + "learning_rate": 4.0202101511453586e-05, + "loss": 1.4141, + "step": 1970 + }, + { + "epoch": 0.59, + "learning_rate": 4.010953844241943e-05, + "loss": 1.4323, + "step": 1980 + }, + { + "epoch": 0.59, + "learning_rate": 4.001664788190135e-05, + "loss": 1.4087, + "step": 1990 + }, + { + "epoch": 0.59, + "learning_rate": 3.992343184325453e-05, + "loss": 1.4186, + "step": 2000 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 3.456686675214729e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/checkpoint-3000/README.md b/checkpoint-3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-3000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-3000/adapter_config.json b/checkpoint-3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/checkpoint-3000/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3000/adapter_model.bin b/checkpoint-3000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..68819aa215e61f1848b8b2c47adaf05ae15a61c4 --- /dev/null +++ b/checkpoint-3000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d481cef78cdb2a1acbb9fd435f4f643daedbc037e87b90265da29b9ec36fe47 +size 26241825 diff --git a/checkpoint-3000/finetuning_args.json b/checkpoint-3000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/checkpoint-3000/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..30df11439750e35c8a98d188568896dce766f0aa --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1cf6d930476d506151b061998257dfd41ad2dd1901be12079800a96c7402f63 +size 52496005 diff --git a/checkpoint-3000/rng_state_0.pth b/checkpoint-3000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..d2afac61aa7ee32034e3ae332032b85decc50326 --- /dev/null +++ b/checkpoint-3000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5247df7cfcf2d409e772139be8982ae4681cb33dfc6126b038b740cd35c960f0 +size 18679 diff --git a/checkpoint-3000/rng_state_1.pth b/checkpoint-3000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..dd08f7ed6a77ceb710635d9154413ac29931dc82 --- /dev/null +++ b/checkpoint-3000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3cf47cf233134812f41e2efe36ce7b26dd09819eedd46f5c747c9b113456122b +size 18679 diff --git a/checkpoint-3000/rng_state_2.pth b/checkpoint-3000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a4018a3ab4579616906ee8106d912765a9ac1f68 --- /dev/null +++ b/checkpoint-3000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0daa7182561b5487a794d217b128815a43e9db9f96f31158e89127b3a1ec9faa +size 18679 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2e7d42e7cfdbbf1834635149d7a2bbc208b9b24c --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9f444ecf2b026bf1093463419a32b0ce285fa263ce3b4908065fb1286c5852f6 +size 627 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c32e688f380d13d2dad8ece9e318c06e05f6f63 --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,1816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8889547373879547, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 4.728677618572965e-05, + "loss": 1.4341, + "step": 1010 + }, + { + "epoch": 0.3, + "learning_rate": 4.723380124395985e-05, + "loss": 1.4526, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 4.7180344395969675e-05, + "loss": 1.4402, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 4.712640680040884e-05, + "loss": 1.4257, + "step": 1040 + }, + { + "epoch": 0.31, + "learning_rate": 4.707198962634701e-05, + "loss": 1.4232, + "step": 1050 + }, + { + "epoch": 0.31, + "learning_rate": 4.70170940532485e-05, + "loss": 1.4485, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 4.6961721270946635e-05, + "loss": 1.456, + "step": 1070 + }, + { + "epoch": 0.32, + "learning_rate": 4.690587247961804e-05, + "loss": 1.4555, + "step": 1080 + }, + { + "epoch": 0.32, + "learning_rate": 4.684954888975657e-05, + "loss": 1.4376, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 4.6792751722147104e-05, + "loss": 1.4353, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6735482207839074e-05, + "loss": 1.4226, + "step": 1110 + }, + { + "epoch": 0.33, + "learning_rate": 4.6677741588119784e-05, + "loss": 1.4315, + "step": 1120 + }, + { + "epoch": 0.33, + "learning_rate": 4.66195311144875e-05, + "loss": 1.4303, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 4.6560852048624345e-05, + "loss": 1.4288, + "step": 1140 + }, + { + "epoch": 0.34, + "learning_rate": 4.650170566236892e-05, + "loss": 1.4539, + "step": 1150 + }, + { + "epoch": 0.34, + "learning_rate": 4.6442093237688756e-05, + "loss": 1.4527, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 4.6382016066652556e-05, + "loss": 1.4406, + "step": 1170 + }, + { + "epoch": 0.35, + "learning_rate": 4.632147545140212e-05, + "loss": 1.4233, + "step": 1180 + }, + { + "epoch": 0.35, + "learning_rate": 4.626047270412419e-05, + "loss": 1.426, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 4.619900914702198e-05, + "loss": 1.4577, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 4.613708611228652e-05, + "loss": 1.4313, + "step": 1210 + }, + { + "epoch": 0.36, + "learning_rate": 4.607470494206776e-05, + "loss": 1.4129, + "step": 1220 + }, + { + "epoch": 0.36, + "learning_rate": 4.601186698844554e-05, + "loss": 1.4368, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 4.594857361340021e-05, + "loss": 1.4342, + "step": 1240 + }, + { + "epoch": 0.37, + "learning_rate": 4.588482618878316e-05, + "loss": 1.4438, + "step": 1250 + }, + { + "epoch": 0.37, + "learning_rate": 4.582062609628709e-05, + "loss": 1.4263, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 4.575597472741601e-05, + "loss": 1.4379, + "step": 1270 + }, + { + "epoch": 0.38, + "learning_rate": 4.569087348345512e-05, + "loss": 1.4221, + "step": 1280 + }, + { + "epoch": 0.38, + "learning_rate": 4.562532377544046e-05, + "loss": 1.4414, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 4.5559327024128265e-05, + "loss": 1.4395, + "step": 1300 + }, + { + "epoch": 0.39, + "learning_rate": 4.549288465996421e-05, + "loss": 1.4278, + "step": 1310 + }, + { + "epoch": 0.39, + "learning_rate": 4.542599812305243e-05, + "loss": 1.4344, + "step": 1320 + }, + { + "epoch": 0.39, + "learning_rate": 4.535866886312423e-05, + "loss": 1.4352, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 4.529089833950675e-05, + "loss": 1.4133, + "step": 1340 + }, + { + "epoch": 0.4, + "learning_rate": 4.5222688021091266e-05, + "loss": 1.4506, + "step": 1350 + }, + { + "epoch": 0.4, + "learning_rate": 4.5154039386301385e-05, + "loss": 1.4295, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 4.5084953923061016e-05, + "loss": 1.4389, + "step": 1370 + }, + { + "epoch": 0.41, + "learning_rate": 4.5015433128762065e-05, + "loss": 1.4247, + "step": 1380 + }, + { + "epoch": 0.41, + "learning_rate": 4.494547851023205e-05, + "loss": 1.4347, + "step": 1390 + }, + { + "epoch": 0.41, + "learning_rate": 4.487509158370139e-05, + "loss": 1.4133, + "step": 1400 + }, + { + "epoch": 0.42, + "learning_rate": 4.480427387477056e-05, + "loss": 1.4296, + "step": 1410 + }, + { + "epoch": 0.42, + "learning_rate": 4.473302691837702e-05, + "loss": 1.4353, + "step": 1420 + }, + { + "epoch": 0.42, + "learning_rate": 4.466135225876194e-05, + "loss": 1.4377, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 4.458925144943676e-05, + "loss": 1.4168, + "step": 1440 + }, + { + "epoch": 0.43, + "learning_rate": 4.451672605314948e-05, + "loss": 1.4334, + "step": 1450 + }, + { + "epoch": 0.43, + "learning_rate": 4.444377764185082e-05, + "loss": 1.44, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 4.43704077966601e-05, + "loss": 1.4375, + "step": 1470 + }, + { + "epoch": 0.44, + "learning_rate": 4.4296618107831036e-05, + "loss": 1.447, + "step": 1480 + }, + { + "epoch": 0.44, + "learning_rate": 4.422241017471722e-05, + "loss": 1.4151, + "step": 1490 + }, + { + "epoch": 0.44, + "learning_rate": 4.414778560573749e-05, + "loss": 1.4388, + "step": 1500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4072746018341036e-05, + "loss": 1.4228, + "step": 1510 + }, + { + "epoch": 0.45, + "learning_rate": 4.399729303897238e-05, + "loss": 1.4104, + "step": 1520 + }, + { + "epoch": 0.45, + "learning_rate": 4.392142830303608e-05, + "loss": 1.4441, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 4.384515345486131e-05, + "loss": 1.4282, + "step": 1540 + }, + { + "epoch": 0.46, + "learning_rate": 4.376847014766623e-05, + "loss": 1.4271, + "step": 1550 + }, + { + "epoch": 0.46, + "learning_rate": 4.369138004352212e-05, + "loss": 1.4223, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 4.3613884813317406e-05, + "loss": 1.425, + "step": 1570 + }, + { + "epoch": 0.47, + "learning_rate": 4.3535986136721377e-05, + "loss": 1.4392, + "step": 1580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3457685702147834e-05, + "loss": 1.4097, + "step": 1590 + }, + { + "epoch": 0.47, + "learning_rate": 4.3378985206718484e-05, + "loss": 1.4405, + "step": 1600 + }, + { + "epoch": 0.48, + "learning_rate": 4.329988635622611e-05, + "loss": 1.4311, + "step": 1610 + }, + { + "epoch": 0.48, + "learning_rate": 4.322039086509769e-05, + "loss": 1.4358, + "step": 1620 + }, + { + "epoch": 0.48, + "learning_rate": 4.3140500456357145e-05, + "loss": 1.4114, + "step": 1630 + }, + { + "epoch": 0.49, + "learning_rate": 4.306021686158805e-05, + "loss": 1.4165, + "step": 1640 + }, + { + "epoch": 0.49, + "learning_rate": 4.297954182089609e-05, + "loss": 1.4309, + "step": 1650 + }, + { + "epoch": 0.49, + "learning_rate": 4.289847708287129e-05, + "loss": 1.4215, + "step": 1660 + }, + { + "epoch": 0.49, + "learning_rate": 4.2817024404550246e-05, + "loss": 1.4124, + "step": 1670 + }, + { + "epoch": 0.5, + "learning_rate": 4.2735185551377895e-05, + "loss": 1.4001, + "step": 1680 + }, + { + "epoch": 0.5, + "learning_rate": 4.265296229716935e-05, + "loss": 1.4302, + "step": 1690 + }, + { + "epoch": 0.5, + "learning_rate": 4.25703564240714e-05, + "loss": 1.4211, + "step": 1700 + }, + { + "epoch": 0.51, + "learning_rate": 4.2487369722523906e-05, + "loss": 1.4423, + "step": 1710 + }, + { + "epoch": 0.51, + "learning_rate": 4.240400399122101e-05, + "loss": 1.4299, + "step": 1720 + }, + { + "epoch": 0.51, + "learning_rate": 4.232026103707209e-05, + "loss": 1.4214, + "step": 1730 + }, + { + "epoch": 0.52, + "learning_rate": 4.223614267516268e-05, + "loss": 1.4348, + "step": 1740 + }, + { + "epoch": 0.52, + "learning_rate": 4.215165072871505e-05, + "loss": 1.4315, + "step": 1750 + }, + { + "epoch": 0.52, + "learning_rate": 4.206678702904874e-05, + "loss": 1.4098, + "step": 1760 + }, + { + "epoch": 0.52, + "learning_rate": 4.198155341554084e-05, + "loss": 1.4242, + "step": 1770 + }, + { + "epoch": 0.53, + "learning_rate": 4.1895951735586145e-05, + "loss": 1.4272, + "step": 1780 + }, + { + "epoch": 0.53, + "learning_rate": 4.1809983844557085e-05, + "loss": 1.4452, + "step": 1790 + }, + { + "epoch": 0.53, + "learning_rate": 4.172365160576355e-05, + "loss": 1.431, + "step": 1800 + }, + { + "epoch": 0.54, + "learning_rate": 4.163695689041245e-05, + "loss": 1.4389, + "step": 1810 + }, + { + "epoch": 0.54, + "learning_rate": 4.154990157756722e-05, + "loss": 1.413, + "step": 1820 + }, + { + "epoch": 0.54, + "learning_rate": 4.1462487554107036e-05, + "loss": 1.3893, + "step": 1830 + }, + { + "epoch": 0.55, + "learning_rate": 4.137471671468596e-05, + "loss": 1.4052, + "step": 1840 + }, + { + "epoch": 0.55, + "learning_rate": 4.128659096169183e-05, + "loss": 1.4173, + "step": 1850 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198112205205096e-05, + "loss": 1.4012, + "step": 1860 + }, + { + "epoch": 0.55, + "learning_rate": 4.110928236295734e-05, + "loss": 1.4119, + "step": 1870 + }, + { + "epoch": 0.56, + "learning_rate": 4.102010336028975e-05, + "loss": 1.4111, + "step": 1880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0930577130111424e-05, + "loss": 1.4156, + "step": 1890 + }, + { + "epoch": 0.56, + "learning_rate": 4.084070561285739e-05, + "loss": 1.4419, + "step": 1900 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750490756446624e-05, + "loss": 1.4121, + "step": 1910 + }, + { + "epoch": 0.57, + "learning_rate": 4.0659934516239795e-05, + "loss": 1.4204, + "step": 1920 + }, + { + "epoch": 0.57, + "learning_rate": 4.056903885499689e-05, + "loss": 1.4032, + "step": 1930 + }, + { + "epoch": 0.57, + "learning_rate": 4.047780574283466e-05, + "loss": 1.4207, + "step": 1940 + }, + { + "epoch": 0.58, + "learning_rate": 4.038623715718397e-05, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.58, + "learning_rate": 4.029433508274686e-05, + "loss": 1.4228, + "step": 1960 + }, + { + "epoch": 0.58, + "learning_rate": 4.0202101511453586e-05, + "loss": 1.4141, + "step": 1970 + }, + { + "epoch": 0.59, + "learning_rate": 4.010953844241943e-05, + "loss": 1.4323, + "step": 1980 + }, + { + "epoch": 0.59, + "learning_rate": 4.001664788190135e-05, + "loss": 1.4087, + "step": 1990 + }, + { + "epoch": 0.59, + "learning_rate": 3.992343184325453e-05, + "loss": 1.4186, + "step": 2000 + }, + { + "epoch": 0.6, + "learning_rate": 3.982989234688873e-05, + "loss": 1.4264, + "step": 2010 + }, + { + "epoch": 0.6, + "learning_rate": 3.973603142022448e-05, + "loss": 1.4417, + "step": 2020 + }, + { + "epoch": 0.6, + "learning_rate": 3.964185109764915e-05, + "loss": 1.4075, + "step": 2030 + }, + { + "epoch": 0.6, + "learning_rate": 3.954735342047285e-05, + "loss": 1.4143, + "step": 2040 + }, + { + "epoch": 0.61, + "learning_rate": 3.945254043688419e-05, + "loss": 1.4176, + "step": 2050 + }, + { + "epoch": 0.61, + "learning_rate": 3.935741420190587e-05, + "loss": 1.4214, + "step": 2060 + }, + { + "epoch": 0.61, + "learning_rate": 3.926197677735018e-05, + "loss": 1.4256, + "step": 2070 + }, + { + "epoch": 0.62, + "learning_rate": 3.9166230231774276e-05, + "loss": 1.4075, + "step": 2080 + }, + { + "epoch": 0.62, + "learning_rate": 3.9070176640435335e-05, + "loss": 1.3887, + "step": 2090 + }, + { + "epoch": 0.62, + "learning_rate": 3.897381808524562e-05, + "loss": 1.4225, + "step": 2100 + }, + { + "epoch": 0.63, + "learning_rate": 3.887715665472729e-05, + "loss": 1.4114, + "step": 2110 + }, + { + "epoch": 0.63, + "learning_rate": 3.8780194443967226e-05, + "loss": 1.4316, + "step": 2120 + }, + { + "epoch": 0.63, + "learning_rate": 3.8682933554571524e-05, + "loss": 1.4168, + "step": 2130 + }, + { + "epoch": 0.63, + "learning_rate": 3.858537609461999e-05, + "loss": 1.4237, + "step": 2140 + }, + { + "epoch": 0.64, + "learning_rate": 3.8487524178620464e-05, + "loss": 1.4373, + "step": 2150 + }, + { + "epoch": 0.64, + "learning_rate": 3.838937992746295e-05, + "loss": 1.4089, + "step": 2160 + }, + { + "epoch": 0.64, + "learning_rate": 3.8290945468373684e-05, + "loss": 1.4319, + "step": 2170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8192222934869e-05, + "loss": 1.4035, + "step": 2180 + }, + { + "epoch": 0.65, + "learning_rate": 3.809321446670909e-05, + "loss": 1.4161, + "step": 2190 + }, + { + "epoch": 0.65, + "learning_rate": 3.799392220985164e-05, + "loss": 1.4136, + "step": 2200 + }, + { + "epoch": 0.65, + "learning_rate": 3.789434831640533e-05, + "loss": 1.4188, + "step": 2210 + }, + { + "epoch": 0.66, + "learning_rate": 3.779449494458312e-05, + "loss": 1.4203, + "step": 2220 + }, + { + "epoch": 0.66, + "learning_rate": 3.769436425865557e-05, + "loss": 1.4263, + "step": 2230 + }, + { + "epoch": 0.66, + "learning_rate": 3.759395842890384e-05, + "loss": 1.4295, + "step": 2240 + }, + { + "epoch": 0.67, + "learning_rate": 3.749327963157274e-05, + "loss": 1.4144, + "step": 2250 + }, + { + "epoch": 0.67, + "learning_rate": 3.739233004882346e-05, + "loss": 1.4162, + "step": 2260 + }, + { + "epoch": 0.67, + "learning_rate": 3.729111186868635e-05, + "loss": 1.4099, + "step": 2270 + }, + { + "epoch": 0.68, + "learning_rate": 3.718962728501348e-05, + "loss": 1.3878, + "step": 2280 + }, + { + "epoch": 0.68, + "learning_rate": 3.708787849743106e-05, + "loss": 1.4399, + "step": 2290 + }, + { + "epoch": 0.68, + "learning_rate": 3.69858677112918e-05, + "loss": 1.4249, + "step": 2300 + }, + { + "epoch": 0.68, + "learning_rate": 3.688359713762707e-05, + "loss": 1.3925, + "step": 2310 + }, + { + "epoch": 0.69, + "learning_rate": 3.6781068993099034e-05, + "loss": 1.4036, + "step": 2320 + }, + { + "epoch": 0.69, + "learning_rate": 3.667828549995255e-05, + "loss": 1.3986, + "step": 2330 + }, + { + "epoch": 0.69, + "learning_rate": 3.657524888596703e-05, + "loss": 1.4298, + "step": 2340 + }, + { + "epoch": 0.7, + "learning_rate": 3.6471961384408155e-05, + "loss": 1.4016, + "step": 2350 + }, + { + "epoch": 0.7, + "learning_rate": 3.636842523397945e-05, + "loss": 1.3992, + "step": 2360 + }, + { + "epoch": 0.7, + "learning_rate": 3.626464267877381e-05, + "loss": 1.4441, + "step": 2370 + }, + { + "epoch": 0.71, + "learning_rate": 3.616061596822478e-05, + "loss": 1.3967, + "step": 2380 + }, + { + "epoch": 0.71, + "learning_rate": 3.6056347357057893e-05, + "loss": 1.4252, + "step": 2390 + }, + { + "epoch": 0.71, + "learning_rate": 3.595183910524173e-05, + "loss": 1.4209, + "step": 2400 + }, + { + "epoch": 0.71, + "learning_rate": 3.5847093477938956e-05, + "loss": 1.4133, + "step": 2410 + }, + { + "epoch": 0.72, + "learning_rate": 3.5742112745457235e-05, + "loss": 1.4313, + "step": 2420 + }, + { + "epoch": 0.72, + "learning_rate": 3.563689918320002e-05, + "loss": 1.4275, + "step": 2430 + }, + { + "epoch": 0.72, + "learning_rate": 3.5531455071617226e-05, + "loss": 1.421, + "step": 2440 + }, + { + "epoch": 0.73, + "learning_rate": 3.542578269615579e-05, + "loss": 1.4402, + "step": 2450 + }, + { + "epoch": 0.73, + "learning_rate": 3.5319884347210186e-05, + "loss": 1.4176, + "step": 2460 + }, + { + "epoch": 0.73, + "learning_rate": 3.521376232007271e-05, + "loss": 1.4117, + "step": 2470 + }, + { + "epoch": 0.73, + "learning_rate": 3.5107418914883794e-05, + "loss": 1.41, + "step": 2480 + }, + { + "epoch": 0.74, + "learning_rate": 3.500085643658211e-05, + "loss": 1.4313, + "step": 2490 + }, + { + "epoch": 0.74, + "learning_rate": 3.489407719485464e-05, + "loss": 1.4035, + "step": 2500 + }, + { + "epoch": 0.74, + "learning_rate": 3.4787083504086605e-05, + "loss": 1.4057, + "step": 2510 + }, + { + "epoch": 0.75, + "learning_rate": 3.467987768331127e-05, + "loss": 1.4125, + "step": 2520 + }, + { + "epoch": 0.75, + "learning_rate": 3.457246205615974e-05, + "loss": 1.4056, + "step": 2530 + }, + { + "epoch": 0.75, + "learning_rate": 3.446483895081054e-05, + "loss": 1.4082, + "step": 2540 + }, + { + "epoch": 0.76, + "learning_rate": 3.4357010699939215e-05, + "loss": 1.3915, + "step": 2550 + }, + { + "epoch": 0.76, + "learning_rate": 3.424897964066769e-05, + "loss": 1.4012, + "step": 2560 + }, + { + "epoch": 0.76, + "learning_rate": 3.4140748114513685e-05, + "loss": 1.4251, + "step": 2570 + }, + { + "epoch": 0.76, + "learning_rate": 3.403231846733994e-05, + "loss": 1.4013, + "step": 2580 + }, + { + "epoch": 0.77, + "learning_rate": 3.392369304930334e-05, + "loss": 1.4076, + "step": 2590 + }, + { + "epoch": 0.77, + "learning_rate": 3.3814874214804034e-05, + "loss": 1.3978, + "step": 2600 + }, + { + "epoch": 0.77, + "learning_rate": 3.3705864322434354e-05, + "loss": 1.408, + "step": 2610 + }, + { + "epoch": 0.78, + "learning_rate": 3.359666573492772e-05, + "loss": 1.3888, + "step": 2620 + }, + { + "epoch": 0.78, + "learning_rate": 3.3487280819107415e-05, + "loss": 1.4052, + "step": 2630 + }, + { + "epoch": 0.78, + "learning_rate": 3.33777119458353e-05, + "loss": 1.4286, + "step": 2640 + }, + { + "epoch": 0.79, + "learning_rate": 3.326796148996042e-05, + "loss": 1.4241, + "step": 2650 + }, + { + "epoch": 0.79, + "learning_rate": 3.315803183026753e-05, + "loss": 1.4049, + "step": 2660 + }, + { + "epoch": 0.79, + "learning_rate": 3.304792534942553e-05, + "loss": 1.3826, + "step": 2670 + }, + { + "epoch": 0.79, + "learning_rate": 3.293764443393582e-05, + "loss": 1.413, + "step": 2680 + }, + { + "epoch": 0.8, + "learning_rate": 3.2827191474080605e-05, + "loss": 1.4161, + "step": 2690 + }, + { + "epoch": 0.8, + "learning_rate": 3.2716568863871044e-05, + "loss": 1.382, + "step": 2700 + }, + { + "epoch": 0.8, + "learning_rate": 3.260577900099539e-05, + "loss": 1.381, + "step": 2710 + }, + { + "epoch": 0.81, + "learning_rate": 3.2494824286767e-05, + "loss": 1.396, + "step": 2720 + }, + { + "epoch": 0.81, + "learning_rate": 3.2383707126072315e-05, + "loss": 1.3923, + "step": 2730 + }, + { + "epoch": 0.81, + "learning_rate": 3.2272429927318707e-05, + "loss": 1.4044, + "step": 2740 + }, + { + "epoch": 0.81, + "learning_rate": 3.21609951023823e-05, + "loss": 1.4073, + "step": 2750 + }, + { + "epoch": 0.82, + "learning_rate": 3.204940506655568e-05, + "loss": 1.4178, + "step": 2760 + }, + { + "epoch": 0.82, + "learning_rate": 3.1937662238495544e-05, + "loss": 1.4179, + "step": 2770 + }, + { + "epoch": 0.82, + "learning_rate": 3.1825769040170285e-05, + "loss": 1.4003, + "step": 2780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1713727896807505e-05, + "loss": 1.4176, + "step": 2790 + }, + { + "epoch": 0.83, + "learning_rate": 3.160154123684143e-05, + "loss": 1.4179, + "step": 2800 + }, + { + "epoch": 0.83, + "learning_rate": 3.1489211491860276e-05, + "loss": 1.4098, + "step": 2810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1376741096553576e-05, + "loss": 1.4087, + "step": 2820 + }, + { + "epoch": 0.84, + "learning_rate": 3.126413248865935e-05, + "loss": 1.3971, + "step": 2830 + }, + { + "epoch": 0.84, + "learning_rate": 3.115138810891134e-05, + "loss": 1.3915, + "step": 2840 + }, + { + "epoch": 0.84, + "learning_rate": 3.103851040098607e-05, + "loss": 1.4041, + "step": 2850 + }, + { + "epoch": 0.85, + "learning_rate": 3.0925501811449855e-05, + "loss": 1.4129, + "step": 2860 + }, + { + "epoch": 0.85, + "learning_rate": 3.081236478970583e-05, + "loss": 1.3948, + "step": 2870 + }, + { + "epoch": 0.85, + "learning_rate": 3.069910178794082e-05, + "loss": 1.4116, + "step": 2880 + }, + { + "epoch": 0.86, + "learning_rate": 3.0585715261072206e-05, + "loss": 1.4029, + "step": 2890 + }, + { + "epoch": 0.86, + "learning_rate": 3.04722076666947e-05, + "loss": 1.399, + "step": 2900 + }, + { + "epoch": 0.86, + "learning_rate": 3.0358581465027125e-05, + "loss": 1.4061, + "step": 2910 + }, + { + "epoch": 0.87, + "learning_rate": 3.024483911885901e-05, + "loss": 1.4152, + "step": 2920 + }, + { + "epoch": 0.87, + "learning_rate": 3.013098309349729e-05, + "loss": 1.4257, + "step": 2930 + }, + { + "epoch": 0.87, + "learning_rate": 3.0017015856712814e-05, + "loss": 1.417, + "step": 2940 + }, + { + "epoch": 0.87, + "learning_rate": 2.9902939878686915e-05, + "loss": 1.3952, + "step": 2950 + }, + { + "epoch": 0.88, + "learning_rate": 2.978875763195779e-05, + "loss": 1.4252, + "step": 2960 + }, + { + "epoch": 0.88, + "learning_rate": 2.9674471591367005e-05, + "loss": 1.3982, + "step": 2970 + }, + { + "epoch": 0.88, + "learning_rate": 2.9560084234005765e-05, + "loss": 1.3948, + "step": 2980 + }, + { + "epoch": 0.89, + "learning_rate": 2.944559803916128e-05, + "loss": 1.4127, + "step": 2990 + }, + { + "epoch": 0.89, + "learning_rate": 2.9331015488263024e-05, + "loss": 1.4239, + "step": 3000 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 5.192765691303821e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/checkpoint-4000/README.md b/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-4000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-4000/adapter_config.json b/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/checkpoint-4000/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4000/adapter_model.bin b/checkpoint-4000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c72a8cd27aae736f7087c4d7bda6665fd05192c --- /dev/null +++ b/checkpoint-4000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:305d4c0d5632b25d66157a46bc462ddb23c78c9bfb4e2d393438bc9ab3d12ab9 +size 26241825 diff --git a/checkpoint-4000/finetuning_args.json b/checkpoint-4000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/checkpoint-4000/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3b9aebda5d40281e09e9651fbab5f9bb6a406d2b --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fc6413dfa3ba285641547498a9e196bdcf12f8ec5290a88c0fff4365c597dc9d +size 52496005 diff --git a/checkpoint-4000/rng_state_0.pth b/checkpoint-4000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..194e1d4be83e42df62b0bf72fc4b96c7a40cc449 --- /dev/null +++ b/checkpoint-4000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1e9fd3480c82b64c0212131684fa4a9ab9565dcc212131844b65693ba91eb3ce +size 18679 diff --git a/checkpoint-4000/rng_state_1.pth b/checkpoint-4000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..410e5f2d118ef825890223d88f60c26c5e9d3b40 --- /dev/null +++ b/checkpoint-4000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ec60c0398d39d6da5ef9df46717920fa6bf96d0317598638a9f8871509e44be +size 18679 diff --git a/checkpoint-4000/rng_state_2.pth b/checkpoint-4000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..df6b11b7ec89479d89ed3b5f98d79ad8715c0a37 --- /dev/null +++ b/checkpoint-4000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7db5113e746717c9beadfbf77f549bd629d61088d3b5c40f1615d14f04946ee +size 18679 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b325759dae8896ba716a6950cff17e7d5d0140ae --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:75ee71e2c80b8abe77d511b7a781a32530b3ad6f89f1e93aed1c39ddd3736e12 +size 627 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..44d258883787fb1d733e586493392dc1586c03cb --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,2416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1852729831839395, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 4.728677618572965e-05, + "loss": 1.4341, + "step": 1010 + }, + { + "epoch": 0.3, + "learning_rate": 4.723380124395985e-05, + "loss": 1.4526, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 4.7180344395969675e-05, + "loss": 1.4402, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 4.712640680040884e-05, + "loss": 1.4257, + "step": 1040 + }, + { + "epoch": 0.31, + "learning_rate": 4.707198962634701e-05, + "loss": 1.4232, + "step": 1050 + }, + { + "epoch": 0.31, + "learning_rate": 4.70170940532485e-05, + "loss": 1.4485, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 4.6961721270946635e-05, + "loss": 1.456, + "step": 1070 + }, + { + "epoch": 0.32, + "learning_rate": 4.690587247961804e-05, + "loss": 1.4555, + "step": 1080 + }, + { + "epoch": 0.32, + "learning_rate": 4.684954888975657e-05, + "loss": 1.4376, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 4.6792751722147104e-05, + "loss": 1.4353, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6735482207839074e-05, + "loss": 1.4226, + "step": 1110 + }, + { + "epoch": 0.33, + "learning_rate": 4.6677741588119784e-05, + "loss": 1.4315, + "step": 1120 + }, + { + "epoch": 0.33, + "learning_rate": 4.66195311144875e-05, + "loss": 1.4303, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 4.6560852048624345e-05, + "loss": 1.4288, + "step": 1140 + }, + { + "epoch": 0.34, + "learning_rate": 4.650170566236892e-05, + "loss": 1.4539, + "step": 1150 + }, + { + "epoch": 0.34, + "learning_rate": 4.6442093237688756e-05, + "loss": 1.4527, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 4.6382016066652556e-05, + "loss": 1.4406, + "step": 1170 + }, + { + "epoch": 0.35, + "learning_rate": 4.632147545140212e-05, + "loss": 1.4233, + "step": 1180 + }, + { + "epoch": 0.35, + "learning_rate": 4.626047270412419e-05, + "loss": 1.426, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 4.619900914702198e-05, + "loss": 1.4577, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 4.613708611228652e-05, + "loss": 1.4313, + "step": 1210 + }, + { + "epoch": 0.36, + "learning_rate": 4.607470494206776e-05, + "loss": 1.4129, + "step": 1220 + }, + { + "epoch": 0.36, + "learning_rate": 4.601186698844554e-05, + "loss": 1.4368, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 4.594857361340021e-05, + "loss": 1.4342, + "step": 1240 + }, + { + "epoch": 0.37, + "learning_rate": 4.588482618878316e-05, + "loss": 1.4438, + "step": 1250 + }, + { + "epoch": 0.37, + "learning_rate": 4.582062609628709e-05, + "loss": 1.4263, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 4.575597472741601e-05, + "loss": 1.4379, + "step": 1270 + }, + { + "epoch": 0.38, + "learning_rate": 4.569087348345512e-05, + "loss": 1.4221, + "step": 1280 + }, + { + "epoch": 0.38, + "learning_rate": 4.562532377544046e-05, + "loss": 1.4414, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 4.5559327024128265e-05, + "loss": 1.4395, + "step": 1300 + }, + { + "epoch": 0.39, + "learning_rate": 4.549288465996421e-05, + "loss": 1.4278, + "step": 1310 + }, + { + "epoch": 0.39, + "learning_rate": 4.542599812305243e-05, + "loss": 1.4344, + "step": 1320 + }, + { + "epoch": 0.39, + "learning_rate": 4.535866886312423e-05, + "loss": 1.4352, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 4.529089833950675e-05, + "loss": 1.4133, + "step": 1340 + }, + { + "epoch": 0.4, + "learning_rate": 4.5222688021091266e-05, + "loss": 1.4506, + "step": 1350 + }, + { + "epoch": 0.4, + "learning_rate": 4.5154039386301385e-05, + "loss": 1.4295, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 4.5084953923061016e-05, + "loss": 1.4389, + "step": 1370 + }, + { + "epoch": 0.41, + "learning_rate": 4.5015433128762065e-05, + "loss": 1.4247, + "step": 1380 + }, + { + "epoch": 0.41, + "learning_rate": 4.494547851023205e-05, + "loss": 1.4347, + "step": 1390 + }, + { + "epoch": 0.41, + "learning_rate": 4.487509158370139e-05, + "loss": 1.4133, + "step": 1400 + }, + { + "epoch": 0.42, + "learning_rate": 4.480427387477056e-05, + "loss": 1.4296, + "step": 1410 + }, + { + "epoch": 0.42, + "learning_rate": 4.473302691837702e-05, + "loss": 1.4353, + "step": 1420 + }, + { + "epoch": 0.42, + "learning_rate": 4.466135225876194e-05, + "loss": 1.4377, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 4.458925144943676e-05, + "loss": 1.4168, + "step": 1440 + }, + { + "epoch": 0.43, + "learning_rate": 4.451672605314948e-05, + "loss": 1.4334, + "step": 1450 + }, + { + "epoch": 0.43, + "learning_rate": 4.444377764185082e-05, + "loss": 1.44, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 4.43704077966601e-05, + "loss": 1.4375, + "step": 1470 + }, + { + "epoch": 0.44, + "learning_rate": 4.4296618107831036e-05, + "loss": 1.447, + "step": 1480 + }, + { + "epoch": 0.44, + "learning_rate": 4.422241017471722e-05, + "loss": 1.4151, + "step": 1490 + }, + { + "epoch": 0.44, + "learning_rate": 4.414778560573749e-05, + "loss": 1.4388, + "step": 1500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4072746018341036e-05, + "loss": 1.4228, + "step": 1510 + }, + { + "epoch": 0.45, + "learning_rate": 4.399729303897238e-05, + "loss": 1.4104, + "step": 1520 + }, + { + "epoch": 0.45, + "learning_rate": 4.392142830303608e-05, + "loss": 1.4441, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 4.384515345486131e-05, + "loss": 1.4282, + "step": 1540 + }, + { + "epoch": 0.46, + "learning_rate": 4.376847014766623e-05, + "loss": 1.4271, + "step": 1550 + }, + { + "epoch": 0.46, + "learning_rate": 4.369138004352212e-05, + "loss": 1.4223, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 4.3613884813317406e-05, + "loss": 1.425, + "step": 1570 + }, + { + "epoch": 0.47, + "learning_rate": 4.3535986136721377e-05, + "loss": 1.4392, + "step": 1580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3457685702147834e-05, + "loss": 1.4097, + "step": 1590 + }, + { + "epoch": 0.47, + "learning_rate": 4.3378985206718484e-05, + "loss": 1.4405, + "step": 1600 + }, + { + "epoch": 0.48, + "learning_rate": 4.329988635622611e-05, + "loss": 1.4311, + "step": 1610 + }, + { + "epoch": 0.48, + "learning_rate": 4.322039086509769e-05, + "loss": 1.4358, + "step": 1620 + }, + { + "epoch": 0.48, + "learning_rate": 4.3140500456357145e-05, + "loss": 1.4114, + "step": 1630 + }, + { + "epoch": 0.49, + "learning_rate": 4.306021686158805e-05, + "loss": 1.4165, + "step": 1640 + }, + { + "epoch": 0.49, + "learning_rate": 4.297954182089609e-05, + "loss": 1.4309, + "step": 1650 + }, + { + "epoch": 0.49, + "learning_rate": 4.289847708287129e-05, + "loss": 1.4215, + "step": 1660 + }, + { + "epoch": 0.49, + "learning_rate": 4.2817024404550246e-05, + "loss": 1.4124, + "step": 1670 + }, + { + "epoch": 0.5, + "learning_rate": 4.2735185551377895e-05, + "loss": 1.4001, + "step": 1680 + }, + { + "epoch": 0.5, + "learning_rate": 4.265296229716935e-05, + "loss": 1.4302, + "step": 1690 + }, + { + "epoch": 0.5, + "learning_rate": 4.25703564240714e-05, + "loss": 1.4211, + "step": 1700 + }, + { + "epoch": 0.51, + "learning_rate": 4.2487369722523906e-05, + "loss": 1.4423, + "step": 1710 + }, + { + "epoch": 0.51, + "learning_rate": 4.240400399122101e-05, + "loss": 1.4299, + "step": 1720 + }, + { + "epoch": 0.51, + "learning_rate": 4.232026103707209e-05, + "loss": 1.4214, + "step": 1730 + }, + { + "epoch": 0.52, + "learning_rate": 4.223614267516268e-05, + "loss": 1.4348, + "step": 1740 + }, + { + "epoch": 0.52, + "learning_rate": 4.215165072871505e-05, + "loss": 1.4315, + "step": 1750 + }, + { + "epoch": 0.52, + "learning_rate": 4.206678702904874e-05, + "loss": 1.4098, + "step": 1760 + }, + { + "epoch": 0.52, + "learning_rate": 4.198155341554084e-05, + "loss": 1.4242, + "step": 1770 + }, + { + "epoch": 0.53, + "learning_rate": 4.1895951735586145e-05, + "loss": 1.4272, + "step": 1780 + }, + { + "epoch": 0.53, + "learning_rate": 4.1809983844557085e-05, + "loss": 1.4452, + "step": 1790 + }, + { + "epoch": 0.53, + "learning_rate": 4.172365160576355e-05, + "loss": 1.431, + "step": 1800 + }, + { + "epoch": 0.54, + "learning_rate": 4.163695689041245e-05, + "loss": 1.4389, + "step": 1810 + }, + { + "epoch": 0.54, + "learning_rate": 4.154990157756722e-05, + "loss": 1.413, + "step": 1820 + }, + { + "epoch": 0.54, + "learning_rate": 4.1462487554107036e-05, + "loss": 1.3893, + "step": 1830 + }, + { + "epoch": 0.55, + "learning_rate": 4.137471671468596e-05, + "loss": 1.4052, + "step": 1840 + }, + { + "epoch": 0.55, + "learning_rate": 4.128659096169183e-05, + "loss": 1.4173, + "step": 1850 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198112205205096e-05, + "loss": 1.4012, + "step": 1860 + }, + { + "epoch": 0.55, + "learning_rate": 4.110928236295734e-05, + "loss": 1.4119, + "step": 1870 + }, + { + "epoch": 0.56, + "learning_rate": 4.102010336028975e-05, + "loss": 1.4111, + "step": 1880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0930577130111424e-05, + "loss": 1.4156, + "step": 1890 + }, + { + "epoch": 0.56, + "learning_rate": 4.084070561285739e-05, + "loss": 1.4419, + "step": 1900 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750490756446624e-05, + "loss": 1.4121, + "step": 1910 + }, + { + "epoch": 0.57, + "learning_rate": 4.0659934516239795e-05, + "loss": 1.4204, + "step": 1920 + }, + { + "epoch": 0.57, + "learning_rate": 4.056903885499689e-05, + "loss": 1.4032, + "step": 1930 + }, + { + "epoch": 0.57, + "learning_rate": 4.047780574283466e-05, + "loss": 1.4207, + "step": 1940 + }, + { + "epoch": 0.58, + "learning_rate": 4.038623715718397e-05, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.58, + "learning_rate": 4.029433508274686e-05, + "loss": 1.4228, + "step": 1960 + }, + { + "epoch": 0.58, + "learning_rate": 4.0202101511453586e-05, + "loss": 1.4141, + "step": 1970 + }, + { + "epoch": 0.59, + "learning_rate": 4.010953844241943e-05, + "loss": 1.4323, + "step": 1980 + }, + { + "epoch": 0.59, + "learning_rate": 4.001664788190135e-05, + "loss": 1.4087, + "step": 1990 + }, + { + "epoch": 0.59, + "learning_rate": 3.992343184325453e-05, + "loss": 1.4186, + "step": 2000 + }, + { + "epoch": 0.6, + "learning_rate": 3.982989234688873e-05, + "loss": 1.4264, + "step": 2010 + }, + { + "epoch": 0.6, + "learning_rate": 3.973603142022448e-05, + "loss": 1.4417, + "step": 2020 + }, + { + "epoch": 0.6, + "learning_rate": 3.964185109764915e-05, + "loss": 1.4075, + "step": 2030 + }, + { + "epoch": 0.6, + "learning_rate": 3.954735342047285e-05, + "loss": 1.4143, + "step": 2040 + }, + { + "epoch": 0.61, + "learning_rate": 3.945254043688419e-05, + "loss": 1.4176, + "step": 2050 + }, + { + "epoch": 0.61, + "learning_rate": 3.935741420190587e-05, + "loss": 1.4214, + "step": 2060 + }, + { + "epoch": 0.61, + "learning_rate": 3.926197677735018e-05, + "loss": 1.4256, + "step": 2070 + }, + { + "epoch": 0.62, + "learning_rate": 3.9166230231774276e-05, + "loss": 1.4075, + "step": 2080 + }, + { + "epoch": 0.62, + "learning_rate": 3.9070176640435335e-05, + "loss": 1.3887, + "step": 2090 + }, + { + "epoch": 0.62, + "learning_rate": 3.897381808524562e-05, + "loss": 1.4225, + "step": 2100 + }, + { + "epoch": 0.63, + "learning_rate": 3.887715665472729e-05, + "loss": 1.4114, + "step": 2110 + }, + { + "epoch": 0.63, + "learning_rate": 3.8780194443967226e-05, + "loss": 1.4316, + "step": 2120 + }, + { + "epoch": 0.63, + "learning_rate": 3.8682933554571524e-05, + "loss": 1.4168, + "step": 2130 + }, + { + "epoch": 0.63, + "learning_rate": 3.858537609461999e-05, + "loss": 1.4237, + "step": 2140 + }, + { + "epoch": 0.64, + "learning_rate": 3.8487524178620464e-05, + "loss": 1.4373, + "step": 2150 + }, + { + "epoch": 0.64, + "learning_rate": 3.838937992746295e-05, + "loss": 1.4089, + "step": 2160 + }, + { + "epoch": 0.64, + "learning_rate": 3.8290945468373684e-05, + "loss": 1.4319, + "step": 2170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8192222934869e-05, + "loss": 1.4035, + "step": 2180 + }, + { + "epoch": 0.65, + "learning_rate": 3.809321446670909e-05, + "loss": 1.4161, + "step": 2190 + }, + { + "epoch": 0.65, + "learning_rate": 3.799392220985164e-05, + "loss": 1.4136, + "step": 2200 + }, + { + "epoch": 0.65, + "learning_rate": 3.789434831640533e-05, + "loss": 1.4188, + "step": 2210 + }, + { + "epoch": 0.66, + "learning_rate": 3.779449494458312e-05, + "loss": 1.4203, + "step": 2220 + }, + { + "epoch": 0.66, + "learning_rate": 3.769436425865557e-05, + "loss": 1.4263, + "step": 2230 + }, + { + "epoch": 0.66, + "learning_rate": 3.759395842890384e-05, + "loss": 1.4295, + "step": 2240 + }, + { + "epoch": 0.67, + "learning_rate": 3.749327963157274e-05, + "loss": 1.4144, + "step": 2250 + }, + { + "epoch": 0.67, + "learning_rate": 3.739233004882346e-05, + "loss": 1.4162, + "step": 2260 + }, + { + "epoch": 0.67, + "learning_rate": 3.729111186868635e-05, + "loss": 1.4099, + "step": 2270 + }, + { + "epoch": 0.68, + "learning_rate": 3.718962728501348e-05, + "loss": 1.3878, + "step": 2280 + }, + { + "epoch": 0.68, + "learning_rate": 3.708787849743106e-05, + "loss": 1.4399, + "step": 2290 + }, + { + "epoch": 0.68, + "learning_rate": 3.69858677112918e-05, + "loss": 1.4249, + "step": 2300 + }, + { + "epoch": 0.68, + "learning_rate": 3.688359713762707e-05, + "loss": 1.3925, + "step": 2310 + }, + { + "epoch": 0.69, + "learning_rate": 3.6781068993099034e-05, + "loss": 1.4036, + "step": 2320 + }, + { + "epoch": 0.69, + "learning_rate": 3.667828549995255e-05, + "loss": 1.3986, + "step": 2330 + }, + { + "epoch": 0.69, + "learning_rate": 3.657524888596703e-05, + "loss": 1.4298, + "step": 2340 + }, + { + "epoch": 0.7, + "learning_rate": 3.6471961384408155e-05, + "loss": 1.4016, + "step": 2350 + }, + { + "epoch": 0.7, + "learning_rate": 3.636842523397945e-05, + "loss": 1.3992, + "step": 2360 + }, + { + "epoch": 0.7, + "learning_rate": 3.626464267877381e-05, + "loss": 1.4441, + "step": 2370 + }, + { + "epoch": 0.71, + "learning_rate": 3.616061596822478e-05, + "loss": 1.3967, + "step": 2380 + }, + { + "epoch": 0.71, + "learning_rate": 3.6056347357057893e-05, + "loss": 1.4252, + "step": 2390 + }, + { + "epoch": 0.71, + "learning_rate": 3.595183910524173e-05, + "loss": 1.4209, + "step": 2400 + }, + { + "epoch": 0.71, + "learning_rate": 3.5847093477938956e-05, + "loss": 1.4133, + "step": 2410 + }, + { + "epoch": 0.72, + "learning_rate": 3.5742112745457235e-05, + "loss": 1.4313, + "step": 2420 + }, + { + "epoch": 0.72, + "learning_rate": 3.563689918320002e-05, + "loss": 1.4275, + "step": 2430 + }, + { + "epoch": 0.72, + "learning_rate": 3.5531455071617226e-05, + "loss": 1.421, + "step": 2440 + }, + { + "epoch": 0.73, + "learning_rate": 3.542578269615579e-05, + "loss": 1.4402, + "step": 2450 + }, + { + "epoch": 0.73, + "learning_rate": 3.5319884347210186e-05, + "loss": 1.4176, + "step": 2460 + }, + { + "epoch": 0.73, + "learning_rate": 3.521376232007271e-05, + "loss": 1.4117, + "step": 2470 + }, + { + "epoch": 0.73, + "learning_rate": 3.5107418914883794e-05, + "loss": 1.41, + "step": 2480 + }, + { + "epoch": 0.74, + "learning_rate": 3.500085643658211e-05, + "loss": 1.4313, + "step": 2490 + }, + { + "epoch": 0.74, + "learning_rate": 3.489407719485464e-05, + "loss": 1.4035, + "step": 2500 + }, + { + "epoch": 0.74, + "learning_rate": 3.4787083504086605e-05, + "loss": 1.4057, + "step": 2510 + }, + { + "epoch": 0.75, + "learning_rate": 3.467987768331127e-05, + "loss": 1.4125, + "step": 2520 + }, + { + "epoch": 0.75, + "learning_rate": 3.457246205615974e-05, + "loss": 1.4056, + "step": 2530 + }, + { + "epoch": 0.75, + "learning_rate": 3.446483895081054e-05, + "loss": 1.4082, + "step": 2540 + }, + { + "epoch": 0.76, + "learning_rate": 3.4357010699939215e-05, + "loss": 1.3915, + "step": 2550 + }, + { + "epoch": 0.76, + "learning_rate": 3.424897964066769e-05, + "loss": 1.4012, + "step": 2560 + }, + { + "epoch": 0.76, + "learning_rate": 3.4140748114513685e-05, + "loss": 1.4251, + "step": 2570 + }, + { + "epoch": 0.76, + "learning_rate": 3.403231846733994e-05, + "loss": 1.4013, + "step": 2580 + }, + { + "epoch": 0.77, + "learning_rate": 3.392369304930334e-05, + "loss": 1.4076, + "step": 2590 + }, + { + "epoch": 0.77, + "learning_rate": 3.3814874214804034e-05, + "loss": 1.3978, + "step": 2600 + }, + { + "epoch": 0.77, + "learning_rate": 3.3705864322434354e-05, + "loss": 1.408, + "step": 2610 + }, + { + "epoch": 0.78, + "learning_rate": 3.359666573492772e-05, + "loss": 1.3888, + "step": 2620 + }, + { + "epoch": 0.78, + "learning_rate": 3.3487280819107415e-05, + "loss": 1.4052, + "step": 2630 + }, + { + "epoch": 0.78, + "learning_rate": 3.33777119458353e-05, + "loss": 1.4286, + "step": 2640 + }, + { + "epoch": 0.79, + "learning_rate": 3.326796148996042e-05, + "loss": 1.4241, + "step": 2650 + }, + { + "epoch": 0.79, + "learning_rate": 3.315803183026753e-05, + "loss": 1.4049, + "step": 2660 + }, + { + "epoch": 0.79, + "learning_rate": 3.304792534942553e-05, + "loss": 1.3826, + "step": 2670 + }, + { + "epoch": 0.79, + "learning_rate": 3.293764443393582e-05, + "loss": 1.413, + "step": 2680 + }, + { + "epoch": 0.8, + "learning_rate": 3.2827191474080605e-05, + "loss": 1.4161, + "step": 2690 + }, + { + "epoch": 0.8, + "learning_rate": 3.2716568863871044e-05, + "loss": 1.382, + "step": 2700 + }, + { + "epoch": 0.8, + "learning_rate": 3.260577900099539e-05, + "loss": 1.381, + "step": 2710 + }, + { + "epoch": 0.81, + "learning_rate": 3.2494824286767e-05, + "loss": 1.396, + "step": 2720 + }, + { + "epoch": 0.81, + "learning_rate": 3.2383707126072315e-05, + "loss": 1.3923, + "step": 2730 + }, + { + "epoch": 0.81, + "learning_rate": 3.2272429927318707e-05, + "loss": 1.4044, + "step": 2740 + }, + { + "epoch": 0.81, + "learning_rate": 3.21609951023823e-05, + "loss": 1.4073, + "step": 2750 + }, + { + "epoch": 0.82, + "learning_rate": 3.204940506655568e-05, + "loss": 1.4178, + "step": 2760 + }, + { + "epoch": 0.82, + "learning_rate": 3.1937662238495544e-05, + "loss": 1.4179, + "step": 2770 + }, + { + "epoch": 0.82, + "learning_rate": 3.1825769040170285e-05, + "loss": 1.4003, + "step": 2780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1713727896807505e-05, + "loss": 1.4176, + "step": 2790 + }, + { + "epoch": 0.83, + "learning_rate": 3.160154123684143e-05, + "loss": 1.4179, + "step": 2800 + }, + { + "epoch": 0.83, + "learning_rate": 3.1489211491860276e-05, + "loss": 1.4098, + "step": 2810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1376741096553576e-05, + "loss": 1.4087, + "step": 2820 + }, + { + "epoch": 0.84, + "learning_rate": 3.126413248865935e-05, + "loss": 1.3971, + "step": 2830 + }, + { + "epoch": 0.84, + "learning_rate": 3.115138810891134e-05, + "loss": 1.3915, + "step": 2840 + }, + { + "epoch": 0.84, + "learning_rate": 3.103851040098607e-05, + "loss": 1.4041, + "step": 2850 + }, + { + "epoch": 0.85, + "learning_rate": 3.0925501811449855e-05, + "loss": 1.4129, + "step": 2860 + }, + { + "epoch": 0.85, + "learning_rate": 3.081236478970583e-05, + "loss": 1.3948, + "step": 2870 + }, + { + "epoch": 0.85, + "learning_rate": 3.069910178794082e-05, + "loss": 1.4116, + "step": 2880 + }, + { + "epoch": 0.86, + "learning_rate": 3.0585715261072206e-05, + "loss": 1.4029, + "step": 2890 + }, + { + "epoch": 0.86, + "learning_rate": 3.04722076666947e-05, + "loss": 1.399, + "step": 2900 + }, + { + "epoch": 0.86, + "learning_rate": 3.0358581465027125e-05, + "loss": 1.4061, + "step": 2910 + }, + { + "epoch": 0.87, + "learning_rate": 3.024483911885901e-05, + "loss": 1.4152, + "step": 2920 + }, + { + "epoch": 0.87, + "learning_rate": 3.013098309349729e-05, + "loss": 1.4257, + "step": 2930 + }, + { + "epoch": 0.87, + "learning_rate": 3.0017015856712814e-05, + "loss": 1.417, + "step": 2940 + }, + { + "epoch": 0.87, + "learning_rate": 2.9902939878686915e-05, + "loss": 1.3952, + "step": 2950 + }, + { + "epoch": 0.88, + "learning_rate": 2.978875763195779e-05, + "loss": 1.4252, + "step": 2960 + }, + { + "epoch": 0.88, + "learning_rate": 2.9674471591367005e-05, + "loss": 1.3982, + "step": 2970 + }, + { + "epoch": 0.88, + "learning_rate": 2.9560084234005765e-05, + "loss": 1.3948, + "step": 2980 + }, + { + "epoch": 0.89, + "learning_rate": 2.944559803916128e-05, + "loss": 1.4127, + "step": 2990 + }, + { + "epoch": 0.89, + "learning_rate": 2.9331015488263024e-05, + "loss": 1.4239, + "step": 3000 + }, + { + "epoch": 0.89, + "learning_rate": 2.9216339064828914e-05, + "loss": 1.3889, + "step": 3010 + }, + { + "epoch": 0.89, + "learning_rate": 2.910157125441152e-05, + "loss": 1.403, + "step": 3020 + }, + { + "epoch": 0.9, + "learning_rate": 2.898671454454418e-05, + "loss": 1.4106, + "step": 3030 + }, + { + "epoch": 0.9, + "learning_rate": 2.8871771424687078e-05, + "loss": 1.4123, + "step": 3040 + }, + { + "epoch": 0.9, + "learning_rate": 2.8756744386173284e-05, + "loss": 1.4137, + "step": 3050 + }, + { + "epoch": 0.91, + "learning_rate": 2.8641635922154774e-05, + "loss": 1.4009, + "step": 3060 + }, + { + "epoch": 0.91, + "learning_rate": 2.8526448527548372e-05, + "loss": 1.4159, + "step": 3070 + }, + { + "epoch": 0.91, + "learning_rate": 2.8411184698981684e-05, + "loss": 1.4071, + "step": 3080 + }, + { + "epoch": 0.92, + "learning_rate": 2.829584693473899e-05, + "loss": 1.41, + "step": 3090 + }, + { + "epoch": 0.92, + "learning_rate": 2.8180437734707064e-05, + "loss": 1.4038, + "step": 3100 + }, + { + "epoch": 0.92, + "learning_rate": 2.8064959600321043e-05, + "loss": 1.4069, + "step": 3110 + }, + { + "epoch": 0.92, + "learning_rate": 2.7949415034510163e-05, + "loss": 1.4096, + "step": 3120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7833806541643544e-05, + "loss": 1.3821, + "step": 3130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7718136627475865e-05, + "loss": 1.3886, + "step": 3140 + }, + { + "epoch": 0.93, + "learning_rate": 2.76024077990931e-05, + "loss": 1.3923, + "step": 3150 + }, + { + "epoch": 0.94, + "learning_rate": 2.748662256485816e-05, + "loss": 1.4072, + "step": 3160 + }, + { + "epoch": 0.94, + "learning_rate": 2.7370783434356512e-05, + "loss": 1.4126, + "step": 3170 + }, + { + "epoch": 0.94, + "learning_rate": 2.7254892918341802e-05, + "loss": 1.4238, + "step": 3180 + }, + { + "epoch": 0.95, + "learning_rate": 2.713895352868144e-05, + "loss": 1.4183, + "step": 3190 + }, + { + "epoch": 0.95, + "learning_rate": 2.702296777830212e-05, + "loss": 1.4056, + "step": 3200 + }, + { + "epoch": 0.95, + "learning_rate": 2.6906938181135423e-05, + "loss": 1.4096, + "step": 3210 + }, + { + "epoch": 0.95, + "learning_rate": 2.6790867252063247e-05, + "loss": 1.4018, + "step": 3220 + }, + { + "epoch": 0.96, + "learning_rate": 2.6674757506863357e-05, + "loss": 1.3922, + "step": 3230 + }, + { + "epoch": 0.96, + "learning_rate": 2.655861146215483e-05, + "loss": 1.4054, + "step": 3240 + }, + { + "epoch": 0.96, + "learning_rate": 2.6442431635343528e-05, + "loss": 1.3914, + "step": 3250 + }, + { + "epoch": 0.97, + "learning_rate": 2.6326220544567514e-05, + "loss": 1.3851, + "step": 3260 + }, + { + "epoch": 0.97, + "learning_rate": 2.620998070864248e-05, + "loss": 1.4102, + "step": 3270 + }, + { + "epoch": 0.97, + "learning_rate": 2.6093714647007156e-05, + "loss": 1.4069, + "step": 3280 + }, + { + "epoch": 0.97, + "learning_rate": 2.5977424879668705e-05, + "loss": 1.3919, + "step": 3290 + }, + { + "epoch": 0.98, + "learning_rate": 2.5861113927148096e-05, + "loss": 1.4073, + "step": 3300 + }, + { + "epoch": 0.98, + "learning_rate": 2.5744784310425467e-05, + "loss": 1.4025, + "step": 3310 + }, + { + "epoch": 0.98, + "learning_rate": 2.562843855088551e-05, + "loss": 1.3805, + "step": 3320 + }, + { + "epoch": 0.99, + "learning_rate": 2.5512079170262793e-05, + "loss": 1.4032, + "step": 3330 + }, + { + "epoch": 0.99, + "learning_rate": 2.5395708690587117e-05, + "loss": 1.4232, + "step": 3340 + }, + { + "epoch": 0.99, + "learning_rate": 2.527932963412885e-05, + "loss": 1.3897, + "step": 3350 + }, + { + "epoch": 1.0, + "learning_rate": 2.5162944523344256e-05, + "loss": 1.4008, + "step": 3360 + }, + { + "epoch": 1.0, + "learning_rate": 2.5046555880820826e-05, + "loss": 1.3936, + "step": 3370 + }, + { + "epoch": 1.0, + "learning_rate": 2.4930166229222597e-05, + "loss": 1.394, + "step": 3380 + }, + { + "epoch": 1.0, + "learning_rate": 2.481377809123547e-05, + "loss": 1.3903, + "step": 3390 + }, + { + "epoch": 1.01, + "learning_rate": 2.469739398951256e-05, + "loss": 1.3869, + "step": 3400 + }, + { + "epoch": 1.01, + "learning_rate": 2.458101644661947e-05, + "loss": 1.429, + "step": 3410 + }, + { + "epoch": 1.01, + "learning_rate": 2.4464647984979667e-05, + "loss": 1.3987, + "step": 3420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4348291126819783e-05, + "loss": 1.38, + "step": 3430 + }, + { + "epoch": 1.02, + "learning_rate": 2.4231948394114936e-05, + "loss": 1.3906, + "step": 3440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4115622308534096e-05, + "loss": 1.3931, + "step": 3450 + }, + { + "epoch": 1.03, + "learning_rate": 2.399931539138541e-05, + "loss": 1.4135, + "step": 3460 + }, + { + "epoch": 1.03, + "learning_rate": 2.388303016356156e-05, + "loss": 1.3952, + "step": 3470 + }, + { + "epoch": 1.03, + "learning_rate": 2.3766769145485125e-05, + "loss": 1.3972, + "step": 3480 + }, + { + "epoch": 1.03, + "learning_rate": 2.3650534857053943e-05, + "loss": 1.3937, + "step": 3490 + }, + { + "epoch": 1.04, + "learning_rate": 2.3534329817586513e-05, + "loss": 1.3936, + "step": 3500 + }, + { + "epoch": 1.04, + "learning_rate": 2.3418156545767365e-05, + "loss": 1.397, + "step": 3510 + }, + { + "epoch": 1.04, + "learning_rate": 2.3302017559592494e-05, + "loss": 1.3849, + "step": 3520 + }, + { + "epoch": 1.05, + "learning_rate": 2.318591537631476e-05, + "loss": 1.4118, + "step": 3530 + }, + { + "epoch": 1.05, + "learning_rate": 2.3069852512389335e-05, + "loss": 1.414, + "step": 3540 + }, + { + "epoch": 1.05, + "learning_rate": 2.2953831483419184e-05, + "loss": 1.4088, + "step": 3550 + }, + { + "epoch": 1.05, + "learning_rate": 2.2837854804100504e-05, + "loss": 1.3773, + "step": 3560 + }, + { + "epoch": 1.06, + "learning_rate": 2.272192498816825e-05, + "loss": 1.3977, + "step": 3570 + }, + { + "epoch": 1.06, + "learning_rate": 2.260604454834162e-05, + "loss": 1.3591, + "step": 3580 + }, + { + "epoch": 1.06, + "learning_rate": 2.2490215996269617e-05, + "loss": 1.4023, + "step": 3590 + }, + { + "epoch": 1.07, + "learning_rate": 2.237444184247661e-05, + "loss": 1.3873, + "step": 3600 + }, + { + "epoch": 1.07, + "learning_rate": 2.2258724596307915e-05, + "loss": 1.3826, + "step": 3610 + }, + { + "epoch": 1.07, + "learning_rate": 2.214306676587539e-05, + "loss": 1.3732, + "step": 3620 + }, + { + "epoch": 1.08, + "learning_rate": 2.2027470858003098e-05, + "loss": 1.3988, + "step": 3630 + }, + { + "epoch": 1.08, + "learning_rate": 2.1911939378172956e-05, + "loss": 1.4036, + "step": 3640 + }, + { + "epoch": 1.08, + "learning_rate": 2.1796474830470447e-05, + "loss": 1.4236, + "step": 3650 + }, + { + "epoch": 1.08, + "learning_rate": 2.1681079717530328e-05, + "loss": 1.4032, + "step": 3660 + }, + { + "epoch": 1.09, + "learning_rate": 2.156575654048239e-05, + "loss": 1.39, + "step": 3670 + }, + { + "epoch": 1.09, + "learning_rate": 2.145050779889725e-05, + "loss": 1.3757, + "step": 3680 + }, + { + "epoch": 1.09, + "learning_rate": 2.1335335990732186e-05, + "loss": 1.3934, + "step": 3690 + }, + { + "epoch": 1.1, + "learning_rate": 2.1220243612276964e-05, + "loss": 1.3979, + "step": 3700 + }, + { + "epoch": 1.1, + "learning_rate": 2.110523315809978e-05, + "loss": 1.4181, + "step": 3710 + }, + { + "epoch": 1.1, + "learning_rate": 2.0990307120993134e-05, + "loss": 1.406, + "step": 3720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0875467991919854e-05, + "loss": 1.4036, + "step": 3730 + }, + { + "epoch": 1.11, + "learning_rate": 2.076071825995906e-05, + "loss": 1.4095, + "step": 3740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0646060412252246e-05, + "loss": 1.4048, + "step": 3750 + }, + { + "epoch": 1.11, + "learning_rate": 2.0531496933949363e-05, + "loss": 1.3874, + "step": 3760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417030308154953e-05, + "loss": 1.3793, + "step": 3770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0302663015874322e-05, + "loss": 1.4152, + "step": 3780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0188397535959785e-05, + "loss": 1.3738, + "step": 3790 + }, + { + "epoch": 1.13, + "learning_rate": 2.007423634505692e-05, + "loss": 1.4033, + "step": 3800 + }, + { + "epoch": 1.13, + "learning_rate": 1.9960181917550897e-05, + "loss": 1.3753, + "step": 3810 + }, + { + "epoch": 1.13, + "learning_rate": 1.9846236725512835e-05, + "loss": 1.3791, + "step": 3820 + }, + { + "epoch": 1.13, + "learning_rate": 1.973240323864624e-05, + "loss": 1.3837, + "step": 3830 + }, + { + "epoch": 1.14, + "learning_rate": 1.9618683924233467e-05, + "loss": 1.3945, + "step": 3840 + }, + { + "epoch": 1.14, + "learning_rate": 1.9505081247082237e-05, + "loss": 1.3804, + "step": 3850 + }, + { + "epoch": 1.14, + "learning_rate": 1.9391597669472213e-05, + "loss": 1.3964, + "step": 3860 + }, + { + "epoch": 1.15, + "learning_rate": 1.927823565110165e-05, + "loss": 1.3983, + "step": 3870 + }, + { + "epoch": 1.15, + "learning_rate": 1.9164997649034058e-05, + "loss": 1.4169, + "step": 3880 + }, + { + "epoch": 1.15, + "learning_rate": 1.9051886117644963e-05, + "loss": 1.4101, + "step": 3890 + }, + { + "epoch": 1.16, + "learning_rate": 1.89389035085687e-05, + "loss": 1.3823, + "step": 3900 + }, + { + "epoch": 1.16, + "learning_rate": 1.8826052270645276e-05, + "loss": 1.3827, + "step": 3910 + }, + { + "epoch": 1.16, + "learning_rate": 1.8713334849867315e-05, + "loss": 1.4035, + "step": 3920 + }, + { + "epoch": 1.16, + "learning_rate": 1.8600753689327e-05, + "loss": 1.4081, + "step": 3930 + }, + { + "epoch": 1.17, + "learning_rate": 1.8488311229163152e-05, + "loss": 1.3919, + "step": 3940 + }, + { + "epoch": 1.17, + "learning_rate": 1.8376009906508338e-05, + "loss": 1.3854, + "step": 3950 + }, + { + "epoch": 1.17, + "learning_rate": 1.826385215543603e-05, + "loss": 1.3924, + "step": 3960 + }, + { + "epoch": 1.18, + "learning_rate": 1.8151840406907873e-05, + "loss": 1.3851, + "step": 3970 + }, + { + "epoch": 1.18, + "learning_rate": 1.8039977088720972e-05, + "loss": 1.3707, + "step": 3980 + }, + { + "epoch": 1.18, + "learning_rate": 1.7928264625455282e-05, + "loss": 1.3998, + "step": 3990 + }, + { + "epoch": 1.19, + "learning_rate": 1.7816705438421064e-05, + "loss": 1.3931, + "step": 4000 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 6.922102358209987e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/checkpoint-5000/README.md b/checkpoint-5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-5000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-5000/adapter_config.json b/checkpoint-5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/checkpoint-5000/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5000/adapter_model.bin b/checkpoint-5000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..10862d8b666d9c8ab03494a5028160ae24e0528d --- /dev/null +++ b/checkpoint-5000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7b545ca5fea07033f60a40763ee2ed17ddf67b2838ff578729827fa47fdb928a +size 26241825 diff --git a/checkpoint-5000/finetuning_args.json b/checkpoint-5000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/checkpoint-5000/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6f7df645c448c03742ba48f6cab9111580c17079 --- /dev/null +++ b/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:47da8eff3482690bdb2c1f9fce6693323ba4f11216919c8899fc4a6f66ce9f53 +size 52496005 diff --git a/checkpoint-5000/rng_state_0.pth b/checkpoint-5000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..e60562394c9ab758ddfcbe784173d3035e07e4ec --- /dev/null +++ b/checkpoint-5000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e92652415bf156ec5ddea98dc1d44b73c770f16deb820fad3cdaf3c52607ca5d +size 18679 diff --git a/checkpoint-5000/rng_state_1.pth b/checkpoint-5000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..08d8900a9395e24d3c5066577d741aedccc3ad2f --- /dev/null +++ b/checkpoint-5000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e34bd2b31b79be5e2613bbede4aaf28661b5f311bcbf16bb52133edd3fa89eae +size 18679 diff --git a/checkpoint-5000/rng_state_2.pth b/checkpoint-5000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..010c58643c2e8d52a089ebec31dd850e75e5960b --- /dev/null +++ b/checkpoint-5000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6246470ae5cd726ef67c334637b384ff19d6531b599acf0f3c134f2b4ca3d87f +size 18679 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f610ca9940dc9297cb14d28d8e9990866517df63 --- /dev/null +++ b/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bca5f643af4b86ef0e41540d72c1794b65ed9baeb3ef1bf91257253e75ff6fb5 +size 627 diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7ca4426726e546da3120b14868c62566198953c8 --- /dev/null +++ b/checkpoint-5000/trainer_state.json @@ -0,0 +1,3016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4815912289799245, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 4.728677618572965e-05, + "loss": 1.4341, + "step": 1010 + }, + { + "epoch": 0.3, + "learning_rate": 4.723380124395985e-05, + "loss": 1.4526, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 4.7180344395969675e-05, + "loss": 1.4402, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 4.712640680040884e-05, + "loss": 1.4257, + "step": 1040 + }, + { + "epoch": 0.31, + "learning_rate": 4.707198962634701e-05, + "loss": 1.4232, + "step": 1050 + }, + { + "epoch": 0.31, + "learning_rate": 4.70170940532485e-05, + "loss": 1.4485, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 4.6961721270946635e-05, + "loss": 1.456, + "step": 1070 + }, + { + "epoch": 0.32, + "learning_rate": 4.690587247961804e-05, + "loss": 1.4555, + "step": 1080 + }, + { + "epoch": 0.32, + "learning_rate": 4.684954888975657e-05, + "loss": 1.4376, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 4.6792751722147104e-05, + "loss": 1.4353, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6735482207839074e-05, + "loss": 1.4226, + "step": 1110 + }, + { + "epoch": 0.33, + "learning_rate": 4.6677741588119784e-05, + "loss": 1.4315, + "step": 1120 + }, + { + "epoch": 0.33, + "learning_rate": 4.66195311144875e-05, + "loss": 1.4303, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 4.6560852048624345e-05, + "loss": 1.4288, + "step": 1140 + }, + { + "epoch": 0.34, + "learning_rate": 4.650170566236892e-05, + "loss": 1.4539, + "step": 1150 + }, + { + "epoch": 0.34, + "learning_rate": 4.6442093237688756e-05, + "loss": 1.4527, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 4.6382016066652556e-05, + "loss": 1.4406, + "step": 1170 + }, + { + "epoch": 0.35, + "learning_rate": 4.632147545140212e-05, + "loss": 1.4233, + "step": 1180 + }, + { + "epoch": 0.35, + "learning_rate": 4.626047270412419e-05, + "loss": 1.426, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 4.619900914702198e-05, + "loss": 1.4577, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 4.613708611228652e-05, + "loss": 1.4313, + "step": 1210 + }, + { + "epoch": 0.36, + "learning_rate": 4.607470494206776e-05, + "loss": 1.4129, + "step": 1220 + }, + { + "epoch": 0.36, + "learning_rate": 4.601186698844554e-05, + "loss": 1.4368, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 4.594857361340021e-05, + "loss": 1.4342, + "step": 1240 + }, + { + "epoch": 0.37, + "learning_rate": 4.588482618878316e-05, + "loss": 1.4438, + "step": 1250 + }, + { + "epoch": 0.37, + "learning_rate": 4.582062609628709e-05, + "loss": 1.4263, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 4.575597472741601e-05, + "loss": 1.4379, + "step": 1270 + }, + { + "epoch": 0.38, + "learning_rate": 4.569087348345512e-05, + "loss": 1.4221, + "step": 1280 + }, + { + "epoch": 0.38, + "learning_rate": 4.562532377544046e-05, + "loss": 1.4414, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 4.5559327024128265e-05, + "loss": 1.4395, + "step": 1300 + }, + { + "epoch": 0.39, + "learning_rate": 4.549288465996421e-05, + "loss": 1.4278, + "step": 1310 + }, + { + "epoch": 0.39, + "learning_rate": 4.542599812305243e-05, + "loss": 1.4344, + "step": 1320 + }, + { + "epoch": 0.39, + "learning_rate": 4.535866886312423e-05, + "loss": 1.4352, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 4.529089833950675e-05, + "loss": 1.4133, + "step": 1340 + }, + { + "epoch": 0.4, + "learning_rate": 4.5222688021091266e-05, + "loss": 1.4506, + "step": 1350 + }, + { + "epoch": 0.4, + "learning_rate": 4.5154039386301385e-05, + "loss": 1.4295, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 4.5084953923061016e-05, + "loss": 1.4389, + "step": 1370 + }, + { + "epoch": 0.41, + "learning_rate": 4.5015433128762065e-05, + "loss": 1.4247, + "step": 1380 + }, + { + "epoch": 0.41, + "learning_rate": 4.494547851023205e-05, + "loss": 1.4347, + "step": 1390 + }, + { + "epoch": 0.41, + "learning_rate": 4.487509158370139e-05, + "loss": 1.4133, + "step": 1400 + }, + { + "epoch": 0.42, + "learning_rate": 4.480427387477056e-05, + "loss": 1.4296, + "step": 1410 + }, + { + "epoch": 0.42, + "learning_rate": 4.473302691837702e-05, + "loss": 1.4353, + "step": 1420 + }, + { + "epoch": 0.42, + "learning_rate": 4.466135225876194e-05, + "loss": 1.4377, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 4.458925144943676e-05, + "loss": 1.4168, + "step": 1440 + }, + { + "epoch": 0.43, + "learning_rate": 4.451672605314948e-05, + "loss": 1.4334, + "step": 1450 + }, + { + "epoch": 0.43, + "learning_rate": 4.444377764185082e-05, + "loss": 1.44, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 4.43704077966601e-05, + "loss": 1.4375, + "step": 1470 + }, + { + "epoch": 0.44, + "learning_rate": 4.4296618107831036e-05, + "loss": 1.447, + "step": 1480 + }, + { + "epoch": 0.44, + "learning_rate": 4.422241017471722e-05, + "loss": 1.4151, + "step": 1490 + }, + { + "epoch": 0.44, + "learning_rate": 4.414778560573749e-05, + "loss": 1.4388, + "step": 1500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4072746018341036e-05, + "loss": 1.4228, + "step": 1510 + }, + { + "epoch": 0.45, + "learning_rate": 4.399729303897238e-05, + "loss": 1.4104, + "step": 1520 + }, + { + "epoch": 0.45, + "learning_rate": 4.392142830303608e-05, + "loss": 1.4441, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 4.384515345486131e-05, + "loss": 1.4282, + "step": 1540 + }, + { + "epoch": 0.46, + "learning_rate": 4.376847014766623e-05, + "loss": 1.4271, + "step": 1550 + }, + { + "epoch": 0.46, + "learning_rate": 4.369138004352212e-05, + "loss": 1.4223, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 4.3613884813317406e-05, + "loss": 1.425, + "step": 1570 + }, + { + "epoch": 0.47, + "learning_rate": 4.3535986136721377e-05, + "loss": 1.4392, + "step": 1580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3457685702147834e-05, + "loss": 1.4097, + "step": 1590 + }, + { + "epoch": 0.47, + "learning_rate": 4.3378985206718484e-05, + "loss": 1.4405, + "step": 1600 + }, + { + "epoch": 0.48, + "learning_rate": 4.329988635622611e-05, + "loss": 1.4311, + "step": 1610 + }, + { + "epoch": 0.48, + "learning_rate": 4.322039086509769e-05, + "loss": 1.4358, + "step": 1620 + }, + { + "epoch": 0.48, + "learning_rate": 4.3140500456357145e-05, + "loss": 1.4114, + "step": 1630 + }, + { + "epoch": 0.49, + "learning_rate": 4.306021686158805e-05, + "loss": 1.4165, + "step": 1640 + }, + { + "epoch": 0.49, + "learning_rate": 4.297954182089609e-05, + "loss": 1.4309, + "step": 1650 + }, + { + "epoch": 0.49, + "learning_rate": 4.289847708287129e-05, + "loss": 1.4215, + "step": 1660 + }, + { + "epoch": 0.49, + "learning_rate": 4.2817024404550246e-05, + "loss": 1.4124, + "step": 1670 + }, + { + "epoch": 0.5, + "learning_rate": 4.2735185551377895e-05, + "loss": 1.4001, + "step": 1680 + }, + { + "epoch": 0.5, + "learning_rate": 4.265296229716935e-05, + "loss": 1.4302, + "step": 1690 + }, + { + "epoch": 0.5, + "learning_rate": 4.25703564240714e-05, + "loss": 1.4211, + "step": 1700 + }, + { + "epoch": 0.51, + "learning_rate": 4.2487369722523906e-05, + "loss": 1.4423, + "step": 1710 + }, + { + "epoch": 0.51, + "learning_rate": 4.240400399122101e-05, + "loss": 1.4299, + "step": 1720 + }, + { + "epoch": 0.51, + "learning_rate": 4.232026103707209e-05, + "loss": 1.4214, + "step": 1730 + }, + { + "epoch": 0.52, + "learning_rate": 4.223614267516268e-05, + "loss": 1.4348, + "step": 1740 + }, + { + "epoch": 0.52, + "learning_rate": 4.215165072871505e-05, + "loss": 1.4315, + "step": 1750 + }, + { + "epoch": 0.52, + "learning_rate": 4.206678702904874e-05, + "loss": 1.4098, + "step": 1760 + }, + { + "epoch": 0.52, + "learning_rate": 4.198155341554084e-05, + "loss": 1.4242, + "step": 1770 + }, + { + "epoch": 0.53, + "learning_rate": 4.1895951735586145e-05, + "loss": 1.4272, + "step": 1780 + }, + { + "epoch": 0.53, + "learning_rate": 4.1809983844557085e-05, + "loss": 1.4452, + "step": 1790 + }, + { + "epoch": 0.53, + "learning_rate": 4.172365160576355e-05, + "loss": 1.431, + "step": 1800 + }, + { + "epoch": 0.54, + "learning_rate": 4.163695689041245e-05, + "loss": 1.4389, + "step": 1810 + }, + { + "epoch": 0.54, + "learning_rate": 4.154990157756722e-05, + "loss": 1.413, + "step": 1820 + }, + { + "epoch": 0.54, + "learning_rate": 4.1462487554107036e-05, + "loss": 1.3893, + "step": 1830 + }, + { + "epoch": 0.55, + "learning_rate": 4.137471671468596e-05, + "loss": 1.4052, + "step": 1840 + }, + { + "epoch": 0.55, + "learning_rate": 4.128659096169183e-05, + "loss": 1.4173, + "step": 1850 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198112205205096e-05, + "loss": 1.4012, + "step": 1860 + }, + { + "epoch": 0.55, + "learning_rate": 4.110928236295734e-05, + "loss": 1.4119, + "step": 1870 + }, + { + "epoch": 0.56, + "learning_rate": 4.102010336028975e-05, + "loss": 1.4111, + "step": 1880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0930577130111424e-05, + "loss": 1.4156, + "step": 1890 + }, + { + "epoch": 0.56, + "learning_rate": 4.084070561285739e-05, + "loss": 1.4419, + "step": 1900 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750490756446624e-05, + "loss": 1.4121, + "step": 1910 + }, + { + "epoch": 0.57, + "learning_rate": 4.0659934516239795e-05, + "loss": 1.4204, + "step": 1920 + }, + { + "epoch": 0.57, + "learning_rate": 4.056903885499689e-05, + "loss": 1.4032, + "step": 1930 + }, + { + "epoch": 0.57, + "learning_rate": 4.047780574283466e-05, + "loss": 1.4207, + "step": 1940 + }, + { + "epoch": 0.58, + "learning_rate": 4.038623715718397e-05, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.58, + "learning_rate": 4.029433508274686e-05, + "loss": 1.4228, + "step": 1960 + }, + { + "epoch": 0.58, + "learning_rate": 4.0202101511453586e-05, + "loss": 1.4141, + "step": 1970 + }, + { + "epoch": 0.59, + "learning_rate": 4.010953844241943e-05, + "loss": 1.4323, + "step": 1980 + }, + { + "epoch": 0.59, + "learning_rate": 4.001664788190135e-05, + "loss": 1.4087, + "step": 1990 + }, + { + "epoch": 0.59, + "learning_rate": 3.992343184325453e-05, + "loss": 1.4186, + "step": 2000 + }, + { + "epoch": 0.6, + "learning_rate": 3.982989234688873e-05, + "loss": 1.4264, + "step": 2010 + }, + { + "epoch": 0.6, + "learning_rate": 3.973603142022448e-05, + "loss": 1.4417, + "step": 2020 + }, + { + "epoch": 0.6, + "learning_rate": 3.964185109764915e-05, + "loss": 1.4075, + "step": 2030 + }, + { + "epoch": 0.6, + "learning_rate": 3.954735342047285e-05, + "loss": 1.4143, + "step": 2040 + }, + { + "epoch": 0.61, + "learning_rate": 3.945254043688419e-05, + "loss": 1.4176, + "step": 2050 + }, + { + "epoch": 0.61, + "learning_rate": 3.935741420190587e-05, + "loss": 1.4214, + "step": 2060 + }, + { + "epoch": 0.61, + "learning_rate": 3.926197677735018e-05, + "loss": 1.4256, + "step": 2070 + }, + { + "epoch": 0.62, + "learning_rate": 3.9166230231774276e-05, + "loss": 1.4075, + "step": 2080 + }, + { + "epoch": 0.62, + "learning_rate": 3.9070176640435335e-05, + "loss": 1.3887, + "step": 2090 + }, + { + "epoch": 0.62, + "learning_rate": 3.897381808524562e-05, + "loss": 1.4225, + "step": 2100 + }, + { + "epoch": 0.63, + "learning_rate": 3.887715665472729e-05, + "loss": 1.4114, + "step": 2110 + }, + { + "epoch": 0.63, + "learning_rate": 3.8780194443967226e-05, + "loss": 1.4316, + "step": 2120 + }, + { + "epoch": 0.63, + "learning_rate": 3.8682933554571524e-05, + "loss": 1.4168, + "step": 2130 + }, + { + "epoch": 0.63, + "learning_rate": 3.858537609461999e-05, + "loss": 1.4237, + "step": 2140 + }, + { + "epoch": 0.64, + "learning_rate": 3.8487524178620464e-05, + "loss": 1.4373, + "step": 2150 + }, + { + "epoch": 0.64, + "learning_rate": 3.838937992746295e-05, + "loss": 1.4089, + "step": 2160 + }, + { + "epoch": 0.64, + "learning_rate": 3.8290945468373684e-05, + "loss": 1.4319, + "step": 2170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8192222934869e-05, + "loss": 1.4035, + "step": 2180 + }, + { + "epoch": 0.65, + "learning_rate": 3.809321446670909e-05, + "loss": 1.4161, + "step": 2190 + }, + { + "epoch": 0.65, + "learning_rate": 3.799392220985164e-05, + "loss": 1.4136, + "step": 2200 + }, + { + "epoch": 0.65, + "learning_rate": 3.789434831640533e-05, + "loss": 1.4188, + "step": 2210 + }, + { + "epoch": 0.66, + "learning_rate": 3.779449494458312e-05, + "loss": 1.4203, + "step": 2220 + }, + { + "epoch": 0.66, + "learning_rate": 3.769436425865557e-05, + "loss": 1.4263, + "step": 2230 + }, + { + "epoch": 0.66, + "learning_rate": 3.759395842890384e-05, + "loss": 1.4295, + "step": 2240 + }, + { + "epoch": 0.67, + "learning_rate": 3.749327963157274e-05, + "loss": 1.4144, + "step": 2250 + }, + { + "epoch": 0.67, + "learning_rate": 3.739233004882346e-05, + "loss": 1.4162, + "step": 2260 + }, + { + "epoch": 0.67, + "learning_rate": 3.729111186868635e-05, + "loss": 1.4099, + "step": 2270 + }, + { + "epoch": 0.68, + "learning_rate": 3.718962728501348e-05, + "loss": 1.3878, + "step": 2280 + }, + { + "epoch": 0.68, + "learning_rate": 3.708787849743106e-05, + "loss": 1.4399, + "step": 2290 + }, + { + "epoch": 0.68, + "learning_rate": 3.69858677112918e-05, + "loss": 1.4249, + "step": 2300 + }, + { + "epoch": 0.68, + "learning_rate": 3.688359713762707e-05, + "loss": 1.3925, + "step": 2310 + }, + { + "epoch": 0.69, + "learning_rate": 3.6781068993099034e-05, + "loss": 1.4036, + "step": 2320 + }, + { + "epoch": 0.69, + "learning_rate": 3.667828549995255e-05, + "loss": 1.3986, + "step": 2330 + }, + { + "epoch": 0.69, + "learning_rate": 3.657524888596703e-05, + "loss": 1.4298, + "step": 2340 + }, + { + "epoch": 0.7, + "learning_rate": 3.6471961384408155e-05, + "loss": 1.4016, + "step": 2350 + }, + { + "epoch": 0.7, + "learning_rate": 3.636842523397945e-05, + "loss": 1.3992, + "step": 2360 + }, + { + "epoch": 0.7, + "learning_rate": 3.626464267877381e-05, + "loss": 1.4441, + "step": 2370 + }, + { + "epoch": 0.71, + "learning_rate": 3.616061596822478e-05, + "loss": 1.3967, + "step": 2380 + }, + { + "epoch": 0.71, + "learning_rate": 3.6056347357057893e-05, + "loss": 1.4252, + "step": 2390 + }, + { + "epoch": 0.71, + "learning_rate": 3.595183910524173e-05, + "loss": 1.4209, + "step": 2400 + }, + { + "epoch": 0.71, + "learning_rate": 3.5847093477938956e-05, + "loss": 1.4133, + "step": 2410 + }, + { + "epoch": 0.72, + "learning_rate": 3.5742112745457235e-05, + "loss": 1.4313, + "step": 2420 + }, + { + "epoch": 0.72, + "learning_rate": 3.563689918320002e-05, + "loss": 1.4275, + "step": 2430 + }, + { + "epoch": 0.72, + "learning_rate": 3.5531455071617226e-05, + "loss": 1.421, + "step": 2440 + }, + { + "epoch": 0.73, + "learning_rate": 3.542578269615579e-05, + "loss": 1.4402, + "step": 2450 + }, + { + "epoch": 0.73, + "learning_rate": 3.5319884347210186e-05, + "loss": 1.4176, + "step": 2460 + }, + { + "epoch": 0.73, + "learning_rate": 3.521376232007271e-05, + "loss": 1.4117, + "step": 2470 + }, + { + "epoch": 0.73, + "learning_rate": 3.5107418914883794e-05, + "loss": 1.41, + "step": 2480 + }, + { + "epoch": 0.74, + "learning_rate": 3.500085643658211e-05, + "loss": 1.4313, + "step": 2490 + }, + { + "epoch": 0.74, + "learning_rate": 3.489407719485464e-05, + "loss": 1.4035, + "step": 2500 + }, + { + "epoch": 0.74, + "learning_rate": 3.4787083504086605e-05, + "loss": 1.4057, + "step": 2510 + }, + { + "epoch": 0.75, + "learning_rate": 3.467987768331127e-05, + "loss": 1.4125, + "step": 2520 + }, + { + "epoch": 0.75, + "learning_rate": 3.457246205615974e-05, + "loss": 1.4056, + "step": 2530 + }, + { + "epoch": 0.75, + "learning_rate": 3.446483895081054e-05, + "loss": 1.4082, + "step": 2540 + }, + { + "epoch": 0.76, + "learning_rate": 3.4357010699939215e-05, + "loss": 1.3915, + "step": 2550 + }, + { + "epoch": 0.76, + "learning_rate": 3.424897964066769e-05, + "loss": 1.4012, + "step": 2560 + }, + { + "epoch": 0.76, + "learning_rate": 3.4140748114513685e-05, + "loss": 1.4251, + "step": 2570 + }, + { + "epoch": 0.76, + "learning_rate": 3.403231846733994e-05, + "loss": 1.4013, + "step": 2580 + }, + { + "epoch": 0.77, + "learning_rate": 3.392369304930334e-05, + "loss": 1.4076, + "step": 2590 + }, + { + "epoch": 0.77, + "learning_rate": 3.3814874214804034e-05, + "loss": 1.3978, + "step": 2600 + }, + { + "epoch": 0.77, + "learning_rate": 3.3705864322434354e-05, + "loss": 1.408, + "step": 2610 + }, + { + "epoch": 0.78, + "learning_rate": 3.359666573492772e-05, + "loss": 1.3888, + "step": 2620 + }, + { + "epoch": 0.78, + "learning_rate": 3.3487280819107415e-05, + "loss": 1.4052, + "step": 2630 + }, + { + "epoch": 0.78, + "learning_rate": 3.33777119458353e-05, + "loss": 1.4286, + "step": 2640 + }, + { + "epoch": 0.79, + "learning_rate": 3.326796148996042e-05, + "loss": 1.4241, + "step": 2650 + }, + { + "epoch": 0.79, + "learning_rate": 3.315803183026753e-05, + "loss": 1.4049, + "step": 2660 + }, + { + "epoch": 0.79, + "learning_rate": 3.304792534942553e-05, + "loss": 1.3826, + "step": 2670 + }, + { + "epoch": 0.79, + "learning_rate": 3.293764443393582e-05, + "loss": 1.413, + "step": 2680 + }, + { + "epoch": 0.8, + "learning_rate": 3.2827191474080605e-05, + "loss": 1.4161, + "step": 2690 + }, + { + "epoch": 0.8, + "learning_rate": 3.2716568863871044e-05, + "loss": 1.382, + "step": 2700 + }, + { + "epoch": 0.8, + "learning_rate": 3.260577900099539e-05, + "loss": 1.381, + "step": 2710 + }, + { + "epoch": 0.81, + "learning_rate": 3.2494824286767e-05, + "loss": 1.396, + "step": 2720 + }, + { + "epoch": 0.81, + "learning_rate": 3.2383707126072315e-05, + "loss": 1.3923, + "step": 2730 + }, + { + "epoch": 0.81, + "learning_rate": 3.2272429927318707e-05, + "loss": 1.4044, + "step": 2740 + }, + { + "epoch": 0.81, + "learning_rate": 3.21609951023823e-05, + "loss": 1.4073, + "step": 2750 + }, + { + "epoch": 0.82, + "learning_rate": 3.204940506655568e-05, + "loss": 1.4178, + "step": 2760 + }, + { + "epoch": 0.82, + "learning_rate": 3.1937662238495544e-05, + "loss": 1.4179, + "step": 2770 + }, + { + "epoch": 0.82, + "learning_rate": 3.1825769040170285e-05, + "loss": 1.4003, + "step": 2780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1713727896807505e-05, + "loss": 1.4176, + "step": 2790 + }, + { + "epoch": 0.83, + "learning_rate": 3.160154123684143e-05, + "loss": 1.4179, + "step": 2800 + }, + { + "epoch": 0.83, + "learning_rate": 3.1489211491860276e-05, + "loss": 1.4098, + "step": 2810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1376741096553576e-05, + "loss": 1.4087, + "step": 2820 + }, + { + "epoch": 0.84, + "learning_rate": 3.126413248865935e-05, + "loss": 1.3971, + "step": 2830 + }, + { + "epoch": 0.84, + "learning_rate": 3.115138810891134e-05, + "loss": 1.3915, + "step": 2840 + }, + { + "epoch": 0.84, + "learning_rate": 3.103851040098607e-05, + "loss": 1.4041, + "step": 2850 + }, + { + "epoch": 0.85, + "learning_rate": 3.0925501811449855e-05, + "loss": 1.4129, + "step": 2860 + }, + { + "epoch": 0.85, + "learning_rate": 3.081236478970583e-05, + "loss": 1.3948, + "step": 2870 + }, + { + "epoch": 0.85, + "learning_rate": 3.069910178794082e-05, + "loss": 1.4116, + "step": 2880 + }, + { + "epoch": 0.86, + "learning_rate": 3.0585715261072206e-05, + "loss": 1.4029, + "step": 2890 + }, + { + "epoch": 0.86, + "learning_rate": 3.04722076666947e-05, + "loss": 1.399, + "step": 2900 + }, + { + "epoch": 0.86, + "learning_rate": 3.0358581465027125e-05, + "loss": 1.4061, + "step": 2910 + }, + { + "epoch": 0.87, + "learning_rate": 3.024483911885901e-05, + "loss": 1.4152, + "step": 2920 + }, + { + "epoch": 0.87, + "learning_rate": 3.013098309349729e-05, + "loss": 1.4257, + "step": 2930 + }, + { + "epoch": 0.87, + "learning_rate": 3.0017015856712814e-05, + "loss": 1.417, + "step": 2940 + }, + { + "epoch": 0.87, + "learning_rate": 2.9902939878686915e-05, + "loss": 1.3952, + "step": 2950 + }, + { + "epoch": 0.88, + "learning_rate": 2.978875763195779e-05, + "loss": 1.4252, + "step": 2960 + }, + { + "epoch": 0.88, + "learning_rate": 2.9674471591367005e-05, + "loss": 1.3982, + "step": 2970 + }, + { + "epoch": 0.88, + "learning_rate": 2.9560084234005765e-05, + "loss": 1.3948, + "step": 2980 + }, + { + "epoch": 0.89, + "learning_rate": 2.944559803916128e-05, + "loss": 1.4127, + "step": 2990 + }, + { + "epoch": 0.89, + "learning_rate": 2.9331015488263024e-05, + "loss": 1.4239, + "step": 3000 + }, + { + "epoch": 0.89, + "learning_rate": 2.9216339064828914e-05, + "loss": 1.3889, + "step": 3010 + }, + { + "epoch": 0.89, + "learning_rate": 2.910157125441152e-05, + "loss": 1.403, + "step": 3020 + }, + { + "epoch": 0.9, + "learning_rate": 2.898671454454418e-05, + "loss": 1.4106, + "step": 3030 + }, + { + "epoch": 0.9, + "learning_rate": 2.8871771424687078e-05, + "loss": 1.4123, + "step": 3040 + }, + { + "epoch": 0.9, + "learning_rate": 2.8756744386173284e-05, + "loss": 1.4137, + "step": 3050 + }, + { + "epoch": 0.91, + "learning_rate": 2.8641635922154774e-05, + "loss": 1.4009, + "step": 3060 + }, + { + "epoch": 0.91, + "learning_rate": 2.8526448527548372e-05, + "loss": 1.4159, + "step": 3070 + }, + { + "epoch": 0.91, + "learning_rate": 2.8411184698981684e-05, + "loss": 1.4071, + "step": 3080 + }, + { + "epoch": 0.92, + "learning_rate": 2.829584693473899e-05, + "loss": 1.41, + "step": 3090 + }, + { + "epoch": 0.92, + "learning_rate": 2.8180437734707064e-05, + "loss": 1.4038, + "step": 3100 + }, + { + "epoch": 0.92, + "learning_rate": 2.8064959600321043e-05, + "loss": 1.4069, + "step": 3110 + }, + { + "epoch": 0.92, + "learning_rate": 2.7949415034510163e-05, + "loss": 1.4096, + "step": 3120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7833806541643544e-05, + "loss": 1.3821, + "step": 3130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7718136627475865e-05, + "loss": 1.3886, + "step": 3140 + }, + { + "epoch": 0.93, + "learning_rate": 2.76024077990931e-05, + "loss": 1.3923, + "step": 3150 + }, + { + "epoch": 0.94, + "learning_rate": 2.748662256485816e-05, + "loss": 1.4072, + "step": 3160 + }, + { + "epoch": 0.94, + "learning_rate": 2.7370783434356512e-05, + "loss": 1.4126, + "step": 3170 + }, + { + "epoch": 0.94, + "learning_rate": 2.7254892918341802e-05, + "loss": 1.4238, + "step": 3180 + }, + { + "epoch": 0.95, + "learning_rate": 2.713895352868144e-05, + "loss": 1.4183, + "step": 3190 + }, + { + "epoch": 0.95, + "learning_rate": 2.702296777830212e-05, + "loss": 1.4056, + "step": 3200 + }, + { + "epoch": 0.95, + "learning_rate": 2.6906938181135423e-05, + "loss": 1.4096, + "step": 3210 + }, + { + "epoch": 0.95, + "learning_rate": 2.6790867252063247e-05, + "loss": 1.4018, + "step": 3220 + }, + { + "epoch": 0.96, + "learning_rate": 2.6674757506863357e-05, + "loss": 1.3922, + "step": 3230 + }, + { + "epoch": 0.96, + "learning_rate": 2.655861146215483e-05, + "loss": 1.4054, + "step": 3240 + }, + { + "epoch": 0.96, + "learning_rate": 2.6442431635343528e-05, + "loss": 1.3914, + "step": 3250 + }, + { + "epoch": 0.97, + "learning_rate": 2.6326220544567514e-05, + "loss": 1.3851, + "step": 3260 + }, + { + "epoch": 0.97, + "learning_rate": 2.620998070864248e-05, + "loss": 1.4102, + "step": 3270 + }, + { + "epoch": 0.97, + "learning_rate": 2.6093714647007156e-05, + "loss": 1.4069, + "step": 3280 + }, + { + "epoch": 0.97, + "learning_rate": 2.5977424879668705e-05, + "loss": 1.3919, + "step": 3290 + }, + { + "epoch": 0.98, + "learning_rate": 2.5861113927148096e-05, + "loss": 1.4073, + "step": 3300 + }, + { + "epoch": 0.98, + "learning_rate": 2.5744784310425467e-05, + "loss": 1.4025, + "step": 3310 + }, + { + "epoch": 0.98, + "learning_rate": 2.562843855088551e-05, + "loss": 1.3805, + "step": 3320 + }, + { + "epoch": 0.99, + "learning_rate": 2.5512079170262793e-05, + "loss": 1.4032, + "step": 3330 + }, + { + "epoch": 0.99, + "learning_rate": 2.5395708690587117e-05, + "loss": 1.4232, + "step": 3340 + }, + { + "epoch": 0.99, + "learning_rate": 2.527932963412885e-05, + "loss": 1.3897, + "step": 3350 + }, + { + "epoch": 1.0, + "learning_rate": 2.5162944523344256e-05, + "loss": 1.4008, + "step": 3360 + }, + { + "epoch": 1.0, + "learning_rate": 2.5046555880820826e-05, + "loss": 1.3936, + "step": 3370 + }, + { + "epoch": 1.0, + "learning_rate": 2.4930166229222597e-05, + "loss": 1.394, + "step": 3380 + }, + { + "epoch": 1.0, + "learning_rate": 2.481377809123547e-05, + "loss": 1.3903, + "step": 3390 + }, + { + "epoch": 1.01, + "learning_rate": 2.469739398951256e-05, + "loss": 1.3869, + "step": 3400 + }, + { + "epoch": 1.01, + "learning_rate": 2.458101644661947e-05, + "loss": 1.429, + "step": 3410 + }, + { + "epoch": 1.01, + "learning_rate": 2.4464647984979667e-05, + "loss": 1.3987, + "step": 3420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4348291126819783e-05, + "loss": 1.38, + "step": 3430 + }, + { + "epoch": 1.02, + "learning_rate": 2.4231948394114936e-05, + "loss": 1.3906, + "step": 3440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4115622308534096e-05, + "loss": 1.3931, + "step": 3450 + }, + { + "epoch": 1.03, + "learning_rate": 2.399931539138541e-05, + "loss": 1.4135, + "step": 3460 + }, + { + "epoch": 1.03, + "learning_rate": 2.388303016356156e-05, + "loss": 1.3952, + "step": 3470 + }, + { + "epoch": 1.03, + "learning_rate": 2.3766769145485125e-05, + "loss": 1.3972, + "step": 3480 + }, + { + "epoch": 1.03, + "learning_rate": 2.3650534857053943e-05, + "loss": 1.3937, + "step": 3490 + }, + { + "epoch": 1.04, + "learning_rate": 2.3534329817586513e-05, + "loss": 1.3936, + "step": 3500 + }, + { + "epoch": 1.04, + "learning_rate": 2.3418156545767365e-05, + "loss": 1.397, + "step": 3510 + }, + { + "epoch": 1.04, + "learning_rate": 2.3302017559592494e-05, + "loss": 1.3849, + "step": 3520 + }, + { + "epoch": 1.05, + "learning_rate": 2.318591537631476e-05, + "loss": 1.4118, + "step": 3530 + }, + { + "epoch": 1.05, + "learning_rate": 2.3069852512389335e-05, + "loss": 1.414, + "step": 3540 + }, + { + "epoch": 1.05, + "learning_rate": 2.2953831483419184e-05, + "loss": 1.4088, + "step": 3550 + }, + { + "epoch": 1.05, + "learning_rate": 2.2837854804100504e-05, + "loss": 1.3773, + "step": 3560 + }, + { + "epoch": 1.06, + "learning_rate": 2.272192498816825e-05, + "loss": 1.3977, + "step": 3570 + }, + { + "epoch": 1.06, + "learning_rate": 2.260604454834162e-05, + "loss": 1.3591, + "step": 3580 + }, + { + "epoch": 1.06, + "learning_rate": 2.2490215996269617e-05, + "loss": 1.4023, + "step": 3590 + }, + { + "epoch": 1.07, + "learning_rate": 2.237444184247661e-05, + "loss": 1.3873, + "step": 3600 + }, + { + "epoch": 1.07, + "learning_rate": 2.2258724596307915e-05, + "loss": 1.3826, + "step": 3610 + }, + { + "epoch": 1.07, + "learning_rate": 2.214306676587539e-05, + "loss": 1.3732, + "step": 3620 + }, + { + "epoch": 1.08, + "learning_rate": 2.2027470858003098e-05, + "loss": 1.3988, + "step": 3630 + }, + { + "epoch": 1.08, + "learning_rate": 2.1911939378172956e-05, + "loss": 1.4036, + "step": 3640 + }, + { + "epoch": 1.08, + "learning_rate": 2.1796474830470447e-05, + "loss": 1.4236, + "step": 3650 + }, + { + "epoch": 1.08, + "learning_rate": 2.1681079717530328e-05, + "loss": 1.4032, + "step": 3660 + }, + { + "epoch": 1.09, + "learning_rate": 2.156575654048239e-05, + "loss": 1.39, + "step": 3670 + }, + { + "epoch": 1.09, + "learning_rate": 2.145050779889725e-05, + "loss": 1.3757, + "step": 3680 + }, + { + "epoch": 1.09, + "learning_rate": 2.1335335990732186e-05, + "loss": 1.3934, + "step": 3690 + }, + { + "epoch": 1.1, + "learning_rate": 2.1220243612276964e-05, + "loss": 1.3979, + "step": 3700 + }, + { + "epoch": 1.1, + "learning_rate": 2.110523315809978e-05, + "loss": 1.4181, + "step": 3710 + }, + { + "epoch": 1.1, + "learning_rate": 2.0990307120993134e-05, + "loss": 1.406, + "step": 3720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0875467991919854e-05, + "loss": 1.4036, + "step": 3730 + }, + { + "epoch": 1.11, + "learning_rate": 2.076071825995906e-05, + "loss": 1.4095, + "step": 3740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0646060412252246e-05, + "loss": 1.4048, + "step": 3750 + }, + { + "epoch": 1.11, + "learning_rate": 2.0531496933949363e-05, + "loss": 1.3874, + "step": 3760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417030308154953e-05, + "loss": 1.3793, + "step": 3770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0302663015874322e-05, + "loss": 1.4152, + "step": 3780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0188397535959785e-05, + "loss": 1.3738, + "step": 3790 + }, + { + "epoch": 1.13, + "learning_rate": 2.007423634505692e-05, + "loss": 1.4033, + "step": 3800 + }, + { + "epoch": 1.13, + "learning_rate": 1.9960181917550897e-05, + "loss": 1.3753, + "step": 3810 + }, + { + "epoch": 1.13, + "learning_rate": 1.9846236725512835e-05, + "loss": 1.3791, + "step": 3820 + }, + { + "epoch": 1.13, + "learning_rate": 1.973240323864624e-05, + "loss": 1.3837, + "step": 3830 + }, + { + "epoch": 1.14, + "learning_rate": 1.9618683924233467e-05, + "loss": 1.3945, + "step": 3840 + }, + { + "epoch": 1.14, + "learning_rate": 1.9505081247082237e-05, + "loss": 1.3804, + "step": 3850 + }, + { + "epoch": 1.14, + "learning_rate": 1.9391597669472213e-05, + "loss": 1.3964, + "step": 3860 + }, + { + "epoch": 1.15, + "learning_rate": 1.927823565110165e-05, + "loss": 1.3983, + "step": 3870 + }, + { + "epoch": 1.15, + "learning_rate": 1.9164997649034058e-05, + "loss": 1.4169, + "step": 3880 + }, + { + "epoch": 1.15, + "learning_rate": 1.9051886117644963e-05, + "loss": 1.4101, + "step": 3890 + }, + { + "epoch": 1.16, + "learning_rate": 1.89389035085687e-05, + "loss": 1.3823, + "step": 3900 + }, + { + "epoch": 1.16, + "learning_rate": 1.8826052270645276e-05, + "loss": 1.3827, + "step": 3910 + }, + { + "epoch": 1.16, + "learning_rate": 1.8713334849867315e-05, + "loss": 1.4035, + "step": 3920 + }, + { + "epoch": 1.16, + "learning_rate": 1.8600753689327e-05, + "loss": 1.4081, + "step": 3930 + }, + { + "epoch": 1.17, + "learning_rate": 1.8488311229163152e-05, + "loss": 1.3919, + "step": 3940 + }, + { + "epoch": 1.17, + "learning_rate": 1.8376009906508338e-05, + "loss": 1.3854, + "step": 3950 + }, + { + "epoch": 1.17, + "learning_rate": 1.826385215543603e-05, + "loss": 1.3924, + "step": 3960 + }, + { + "epoch": 1.18, + "learning_rate": 1.8151840406907873e-05, + "loss": 1.3851, + "step": 3970 + }, + { + "epoch": 1.18, + "learning_rate": 1.8039977088720972e-05, + "loss": 1.3707, + "step": 3980 + }, + { + "epoch": 1.18, + "learning_rate": 1.7928264625455282e-05, + "loss": 1.3998, + "step": 3990 + }, + { + "epoch": 1.19, + "learning_rate": 1.7816705438421064e-05, + "loss": 1.3931, + "step": 4000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7705301945606384e-05, + "loss": 1.3976, + "step": 4010 + }, + { + "epoch": 1.19, + "learning_rate": 1.7594056561624716e-05, + "loss": 1.3785, + "step": 4020 + }, + { + "epoch": 1.19, + "learning_rate": 1.748297169766262e-05, + "loss": 1.3845, + "step": 4030 + }, + { + "epoch": 1.2, + "learning_rate": 1.7372049761427457e-05, + "loss": 1.3926, + "step": 4040 + }, + { + "epoch": 1.2, + "learning_rate": 1.7261293157095204e-05, + "loss": 1.4075, + "step": 4050 + }, + { + "epoch": 1.2, + "learning_rate": 1.7150704285258375e-05, + "loss": 1.3938, + "step": 4060 + }, + { + "epoch": 1.21, + "learning_rate": 1.7040285542873945e-05, + "loss": 1.3884, + "step": 4070 + }, + { + "epoch": 1.21, + "learning_rate": 1.6930039323211448e-05, + "loss": 1.4066, + "step": 4080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6819968015801048e-05, + "loss": 1.3992, + "step": 4090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6710074006381797e-05, + "loss": 1.4066, + "step": 4100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6600359676849892e-05, + "loss": 1.4076, + "step": 4110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6490827405207062e-05, + "loss": 1.4078, + "step": 4120 + }, + { + "epoch": 1.22, + "learning_rate": 1.638147956550904e-05, + "loss": 1.4026, + "step": 4130 + }, + { + "epoch": 1.23, + "learning_rate": 1.627231852781407e-05, + "loss": 1.3861, + "step": 4140 + }, + { + "epoch": 1.23, + "learning_rate": 1.6163346658131567e-05, + "loss": 1.3915, + "step": 4150 + }, + { + "epoch": 1.23, + "learning_rate": 1.6054566318370832e-05, + "loss": 1.3828, + "step": 4160 + }, + { + "epoch": 1.24, + "learning_rate": 1.5945979866289844e-05, + "loss": 1.3952, + "step": 4170 + }, + { + "epoch": 1.24, + "learning_rate": 1.583758965544417e-05, + "loss": 1.3892, + "step": 4180 + }, + { + "epoch": 1.24, + "learning_rate": 1.5729398035135957e-05, + "loss": 1.3973, + "step": 4190 + }, + { + "epoch": 1.24, + "learning_rate": 1.5621407350362986e-05, + "loss": 1.4225, + "step": 4200 + }, + { + "epoch": 1.25, + "learning_rate": 1.5513619941767886e-05, + "loss": 1.3948, + "step": 4210 + }, + { + "epoch": 1.25, + "learning_rate": 1.540603814558736e-05, + "loss": 1.4074, + "step": 4220 + }, + { + "epoch": 1.25, + "learning_rate": 1.5298664293601574e-05, + "loss": 1.3965, + "step": 4230 + }, + { + "epoch": 1.26, + "learning_rate": 1.5191500713083615e-05, + "loss": 1.3743, + "step": 4240 + }, + { + "epoch": 1.26, + "learning_rate": 1.508454972674904e-05, + "loss": 1.384, + "step": 4250 + }, + { + "epoch": 1.26, + "learning_rate": 1.4977813652705535e-05, + "loss": 1.4018, + "step": 4260 + }, + { + "epoch": 1.27, + "learning_rate": 1.4871294804402675e-05, + "loss": 1.3904, + "step": 4270 + }, + { + "epoch": 1.27, + "learning_rate": 1.4764995490581779e-05, + "loss": 1.3981, + "step": 4280 + }, + { + "epoch": 1.27, + "learning_rate": 1.465891801522587e-05, + "loss": 1.4144, + "step": 4290 + }, + { + "epoch": 1.27, + "learning_rate": 1.4553064677509731e-05, + "loss": 1.4172, + "step": 4300 + }, + { + "epoch": 1.28, + "learning_rate": 1.4447437771750078e-05, + "loss": 1.3873, + "step": 4310 + }, + { + "epoch": 1.28, + "learning_rate": 1.4342039587355832e-05, + "loss": 1.3983, + "step": 4320 + }, + { + "epoch": 1.28, + "learning_rate": 1.423687240877849e-05, + "loss": 1.4007, + "step": 4330 + }, + { + "epoch": 1.29, + "learning_rate": 1.4131938515462639e-05, + "loss": 1.4088, + "step": 4340 + }, + { + "epoch": 1.29, + "learning_rate": 1.4027240181796508e-05, + "loss": 1.3941, + "step": 4350 + }, + { + "epoch": 1.29, + "learning_rate": 1.3922779677062689e-05, + "loss": 1.3975, + "step": 4360 + }, + { + "epoch": 1.29, + "learning_rate": 1.3818559265388964e-05, + "loss": 1.3842, + "step": 4370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3714581205699214e-05, + "loss": 1.4011, + "step": 4380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3610847751664473e-05, + "loss": 1.3881, + "step": 4390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3507361151654067e-05, + "loss": 1.4028, + "step": 4400 + }, + { + "epoch": 1.31, + "learning_rate": 1.340412364868689e-05, + "loss": 1.3973, + "step": 4410 + }, + { + "epoch": 1.31, + "learning_rate": 1.3301137480382786e-05, + "loss": 1.445, + "step": 4420 + }, + { + "epoch": 1.31, + "learning_rate": 1.3198404878914044e-05, + "loss": 1.3957, + "step": 4430 + }, + { + "epoch": 1.32, + "learning_rate": 1.3095928070957037e-05, + "loss": 1.395, + "step": 4440 + }, + { + "epoch": 1.32, + "learning_rate": 1.2993709277643922e-05, + "loss": 1.4157, + "step": 4450 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891750714514545e-05, + "loss": 1.4074, + "step": 4460 + }, + { + "epoch": 1.32, + "learning_rate": 1.2790054591468381e-05, + "loss": 1.3988, + "step": 4470 + }, + { + "epoch": 1.33, + "learning_rate": 1.2688623112716652e-05, + "loss": 1.3914, + "step": 4480 + }, + { + "epoch": 1.33, + "learning_rate": 1.2587458476734559e-05, + "loss": 1.3864, + "step": 4490 + }, + { + "epoch": 1.33, + "learning_rate": 1.248656287621362e-05, + "loss": 1.3934, + "step": 4500 + }, + { + "epoch": 1.34, + "learning_rate": 1.2385938498014138e-05, + "loss": 1.3893, + "step": 4510 + }, + { + "epoch": 1.34, + "learning_rate": 1.2285587523117825e-05, + "loss": 1.3991, + "step": 4520 + }, + { + "epoch": 1.34, + "learning_rate": 1.2185512126580512e-05, + "loss": 1.376, + "step": 4530 + }, + { + "epoch": 1.35, + "learning_rate": 1.2085714477484997e-05, + "loss": 1.3799, + "step": 4540 + }, + { + "epoch": 1.35, + "learning_rate": 1.1986196738894078e-05, + "loss": 1.3738, + "step": 4550 + }, + { + "epoch": 1.35, + "learning_rate": 1.188696106780361e-05, + "loss": 1.3754, + "step": 4560 + }, + { + "epoch": 1.35, + "learning_rate": 1.178800961509578e-05, + "loss": 1.4006, + "step": 4570 + }, + { + "epoch": 1.36, + "learning_rate": 1.1689344525492497e-05, + "loss": 1.4012, + "step": 4580 + }, + { + "epoch": 1.36, + "learning_rate": 1.1590967937508895e-05, + "loss": 1.3973, + "step": 4590 + }, + { + "epoch": 1.36, + "learning_rate": 1.149288198340698e-05, + "loss": 1.3737, + "step": 4600 + }, + { + "epoch": 1.37, + "learning_rate": 1.1395088789149419e-05, + "loss": 1.3998, + "step": 4610 + }, + { + "epoch": 1.37, + "learning_rate": 1.1297590474353464e-05, + "loss": 1.4053, + "step": 4620 + }, + { + "epoch": 1.37, + "learning_rate": 1.1200389152245003e-05, + "loss": 1.4038, + "step": 4630 + }, + { + "epoch": 1.37, + "learning_rate": 1.1103486929612759e-05, + "loss": 1.3968, + "step": 4640 + }, + { + "epoch": 1.38, + "learning_rate": 1.1006885906762626e-05, + "loss": 1.4037, + "step": 4650 + }, + { + "epoch": 1.38, + "learning_rate": 1.0910588177472153e-05, + "loss": 1.3901, + "step": 4660 + }, + { + "epoch": 1.38, + "learning_rate": 1.0814595828945154e-05, + "loss": 1.379, + "step": 4670 + }, + { + "epoch": 1.39, + "learning_rate": 1.0718910941766478e-05, + "loss": 1.3808, + "step": 4680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0623535589856887e-05, + "loss": 1.4105, + "step": 4690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0528471840428142e-05, + "loss": 1.3756, + "step": 4700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0433721753938182e-05, + "loss": 1.3708, + "step": 4710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0339287384046462e-05, + "loss": 1.3924, + "step": 4720 + }, + { + "epoch": 1.4, + "learning_rate": 1.024517077756943e-05, + "loss": 1.3854, + "step": 4730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0151373974436184e-05, + "loss": 1.3908, + "step": 4740 + }, + { + "epoch": 1.41, + "learning_rate": 1.0057899007644245e-05, + "loss": 1.3953, + "step": 4750 + }, + { + "epoch": 1.41, + "learning_rate": 9.964747903215513e-06, + "loss": 1.3933, + "step": 4760 + }, + { + "epoch": 1.41, + "learning_rate": 9.871922680152318e-06, + "loss": 1.3854, + "step": 4770 + }, + { + "epoch": 1.42, + "learning_rate": 9.779425350393685e-06, + "loss": 1.4026, + "step": 4780 + }, + { + "epoch": 1.42, + "learning_rate": 9.687257918771719e-06, + "loss": 1.3958, + "step": 4790 + }, + { + "epoch": 1.42, + "learning_rate": 9.595422382968156e-06, + "loss": 1.3777, + "step": 4800 + }, + { + "epoch": 1.43, + "learning_rate": 9.503920733471052e-06, + "loss": 1.3835, + "step": 4810 + }, + { + "epoch": 1.43, + "learning_rate": 9.412754953531663e-06, + "loss": 1.3768, + "step": 4820 + }, + { + "epoch": 1.43, + "learning_rate": 9.321927019121435e-06, + "loss": 1.3846, + "step": 4830 + }, + { + "epoch": 1.43, + "learning_rate": 9.231438898889184e-06, + "loss": 1.3878, + "step": 4840 + }, + { + "epoch": 1.44, + "learning_rate": 9.141292554118435e-06, + "loss": 1.38, + "step": 4850 + }, + { + "epoch": 1.44, + "learning_rate": 9.051489938684903e-06, + "loss": 1.3841, + "step": 4860 + }, + { + "epoch": 1.44, + "learning_rate": 8.962032999014144e-06, + "loss": 1.4122, + "step": 4870 + }, + { + "epoch": 1.45, + "learning_rate": 8.87292367403937e-06, + "loss": 1.3839, + "step": 4880 + }, + { + "epoch": 1.45, + "learning_rate": 8.784163895159428e-06, + "loss": 1.3932, + "step": 4890 + }, + { + "epoch": 1.45, + "learning_rate": 8.695755586196924e-06, + "loss": 1.4012, + "step": 4900 + }, + { + "epoch": 1.45, + "learning_rate": 8.607700663356543e-06, + "loss": 1.3931, + "step": 4910 + }, + { + "epoch": 1.46, + "learning_rate": 8.520001035183503e-06, + "loss": 1.4003, + "step": 4920 + }, + { + "epoch": 1.46, + "learning_rate": 8.432658602522193e-06, + "loss": 1.4064, + "step": 4930 + }, + { + "epoch": 1.46, + "learning_rate": 8.345675258474969e-06, + "loss": 1.383, + "step": 4940 + }, + { + "epoch": 1.47, + "learning_rate": 8.259052888361132e-06, + "loss": 1.4147, + "step": 4950 + }, + { + "epoch": 1.47, + "learning_rate": 8.172793369676052e-06, + "loss": 1.4064, + "step": 4960 + }, + { + "epoch": 1.47, + "learning_rate": 8.086898572050494e-06, + "loss": 1.3894, + "step": 4970 + }, + { + "epoch": 1.48, + "learning_rate": 8.00137035721007e-06, + "loss": 1.3928, + "step": 4980 + }, + { + "epoch": 1.48, + "learning_rate": 7.916210578934896e-06, + "loss": 1.4049, + "step": 4990 + }, + { + "epoch": 1.48, + "learning_rate": 7.831421083019422e-06, + "loss": 1.402, + "step": 5000 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 8.656312949708685e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/checkpoint-6000/README.md b/checkpoint-6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-6000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-6000/adapter_config.json b/checkpoint-6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..a092508f49b7debf23eb22091bdac4ac1daa62a9 --- /dev/null +++ b/checkpoint-6000/adapter_config.json @@ -0,0 +1,20 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "../Baichuan-13B-Chat", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "W_pack" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6000/adapter_model.bin b/checkpoint-6000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a184feace4ff3257a7afb75c38aad196e7e4b29d --- /dev/null +++ b/checkpoint-6000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a0d941d2b48dd222a5027a74711e6041527c31eef67ab8fd5dbc6c62283f8d7 +size 26241825 diff --git a/checkpoint-6000/finetuning_args.json b/checkpoint-6000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/checkpoint-6000/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/checkpoint-6000/optimizer.pt b/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..4e1749b0c4b3dfc0db9e50db16a1d319ab1f7445 --- /dev/null +++ b/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2e37717aeaf3d8a34006919e8086948e92f01af4a6d86688f68d9f665d1275df +size 52496005 diff --git a/checkpoint-6000/rng_state_0.pth b/checkpoint-6000/rng_state_0.pth new file mode 100644 index 0000000000000000000000000000000000000000..66fcb503b38e86ce0b924b1a8654b0306165fa58 --- /dev/null +++ b/checkpoint-6000/rng_state_0.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ed99833de5db065aca73142d56dc6866df9f39febf6d3de4647cf37052efa1c +size 18679 diff --git a/checkpoint-6000/rng_state_1.pth b/checkpoint-6000/rng_state_1.pth new file mode 100644 index 0000000000000000000000000000000000000000..83201d00f84ae26bcad9ee847644d258b4725eaf --- /dev/null +++ b/checkpoint-6000/rng_state_1.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf1ce9f1b2d2f26872e2ea474f95dc0af9402367a7a02a3d55b55dd33b8cdc09 +size 18679 diff --git a/checkpoint-6000/rng_state_2.pth b/checkpoint-6000/rng_state_2.pth new file mode 100644 index 0000000000000000000000000000000000000000..a3a4cf006af5682e433a99e6a561c470223f2ef4 --- /dev/null +++ b/checkpoint-6000/rng_state_2.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:70bb1e23d16eb0b67db72eb9f5cbff266bc4f24c507340a591c53bf6d98c8abf +size 18679 diff --git a/checkpoint-6000/scheduler.pt b/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c9b53c780db2ff8805df023d908b7c059c692506 --- /dev/null +++ b/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2b105c134b7bd4c25d55b9d4176ff9abd8731bf1989b02b59d3cd12aa47e5e8 +size 627 diff --git a/checkpoint-6000/trainer_state.json b/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..23a26c2ad30023a07640334c56e67c0f65db9a87 --- /dev/null +++ b/checkpoint-6000/trainer_state.json @@ -0,0 +1,3616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7779094747759094, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 4.728677618572965e-05, + "loss": 1.4341, + "step": 1010 + }, + { + "epoch": 0.3, + "learning_rate": 4.723380124395985e-05, + "loss": 1.4526, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 4.7180344395969675e-05, + "loss": 1.4402, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 4.712640680040884e-05, + "loss": 1.4257, + "step": 1040 + }, + { + "epoch": 0.31, + "learning_rate": 4.707198962634701e-05, + "loss": 1.4232, + "step": 1050 + }, + { + "epoch": 0.31, + "learning_rate": 4.70170940532485e-05, + "loss": 1.4485, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 4.6961721270946635e-05, + "loss": 1.456, + "step": 1070 + }, + { + "epoch": 0.32, + "learning_rate": 4.690587247961804e-05, + "loss": 1.4555, + "step": 1080 + }, + { + "epoch": 0.32, + "learning_rate": 4.684954888975657e-05, + "loss": 1.4376, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 4.6792751722147104e-05, + "loss": 1.4353, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6735482207839074e-05, + "loss": 1.4226, + "step": 1110 + }, + { + "epoch": 0.33, + "learning_rate": 4.6677741588119784e-05, + "loss": 1.4315, + "step": 1120 + }, + { + "epoch": 0.33, + "learning_rate": 4.66195311144875e-05, + "loss": 1.4303, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 4.6560852048624345e-05, + "loss": 1.4288, + "step": 1140 + }, + { + "epoch": 0.34, + "learning_rate": 4.650170566236892e-05, + "loss": 1.4539, + "step": 1150 + }, + { + "epoch": 0.34, + "learning_rate": 4.6442093237688756e-05, + "loss": 1.4527, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 4.6382016066652556e-05, + "loss": 1.4406, + "step": 1170 + }, + { + "epoch": 0.35, + "learning_rate": 4.632147545140212e-05, + "loss": 1.4233, + "step": 1180 + }, + { + "epoch": 0.35, + "learning_rate": 4.626047270412419e-05, + "loss": 1.426, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 4.619900914702198e-05, + "loss": 1.4577, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 4.613708611228652e-05, + "loss": 1.4313, + "step": 1210 + }, + { + "epoch": 0.36, + "learning_rate": 4.607470494206776e-05, + "loss": 1.4129, + "step": 1220 + }, + { + "epoch": 0.36, + "learning_rate": 4.601186698844554e-05, + "loss": 1.4368, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 4.594857361340021e-05, + "loss": 1.4342, + "step": 1240 + }, + { + "epoch": 0.37, + "learning_rate": 4.588482618878316e-05, + "loss": 1.4438, + "step": 1250 + }, + { + "epoch": 0.37, + "learning_rate": 4.582062609628709e-05, + "loss": 1.4263, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 4.575597472741601e-05, + "loss": 1.4379, + "step": 1270 + }, + { + "epoch": 0.38, + "learning_rate": 4.569087348345512e-05, + "loss": 1.4221, + "step": 1280 + }, + { + "epoch": 0.38, + "learning_rate": 4.562532377544046e-05, + "loss": 1.4414, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 4.5559327024128265e-05, + "loss": 1.4395, + "step": 1300 + }, + { + "epoch": 0.39, + "learning_rate": 4.549288465996421e-05, + "loss": 1.4278, + "step": 1310 + }, + { + "epoch": 0.39, + "learning_rate": 4.542599812305243e-05, + "loss": 1.4344, + "step": 1320 + }, + { + "epoch": 0.39, + "learning_rate": 4.535866886312423e-05, + "loss": 1.4352, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 4.529089833950675e-05, + "loss": 1.4133, + "step": 1340 + }, + { + "epoch": 0.4, + "learning_rate": 4.5222688021091266e-05, + "loss": 1.4506, + "step": 1350 + }, + { + "epoch": 0.4, + "learning_rate": 4.5154039386301385e-05, + "loss": 1.4295, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 4.5084953923061016e-05, + "loss": 1.4389, + "step": 1370 + }, + { + "epoch": 0.41, + "learning_rate": 4.5015433128762065e-05, + "loss": 1.4247, + "step": 1380 + }, + { + "epoch": 0.41, + "learning_rate": 4.494547851023205e-05, + "loss": 1.4347, + "step": 1390 + }, + { + "epoch": 0.41, + "learning_rate": 4.487509158370139e-05, + "loss": 1.4133, + "step": 1400 + }, + { + "epoch": 0.42, + "learning_rate": 4.480427387477056e-05, + "loss": 1.4296, + "step": 1410 + }, + { + "epoch": 0.42, + "learning_rate": 4.473302691837702e-05, + "loss": 1.4353, + "step": 1420 + }, + { + "epoch": 0.42, + "learning_rate": 4.466135225876194e-05, + "loss": 1.4377, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 4.458925144943676e-05, + "loss": 1.4168, + "step": 1440 + }, + { + "epoch": 0.43, + "learning_rate": 4.451672605314948e-05, + "loss": 1.4334, + "step": 1450 + }, + { + "epoch": 0.43, + "learning_rate": 4.444377764185082e-05, + "loss": 1.44, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 4.43704077966601e-05, + "loss": 1.4375, + "step": 1470 + }, + { + "epoch": 0.44, + "learning_rate": 4.4296618107831036e-05, + "loss": 1.447, + "step": 1480 + }, + { + "epoch": 0.44, + "learning_rate": 4.422241017471722e-05, + "loss": 1.4151, + "step": 1490 + }, + { + "epoch": 0.44, + "learning_rate": 4.414778560573749e-05, + "loss": 1.4388, + "step": 1500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4072746018341036e-05, + "loss": 1.4228, + "step": 1510 + }, + { + "epoch": 0.45, + "learning_rate": 4.399729303897238e-05, + "loss": 1.4104, + "step": 1520 + }, + { + "epoch": 0.45, + "learning_rate": 4.392142830303608e-05, + "loss": 1.4441, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 4.384515345486131e-05, + "loss": 1.4282, + "step": 1540 + }, + { + "epoch": 0.46, + "learning_rate": 4.376847014766623e-05, + "loss": 1.4271, + "step": 1550 + }, + { + "epoch": 0.46, + "learning_rate": 4.369138004352212e-05, + "loss": 1.4223, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 4.3613884813317406e-05, + "loss": 1.425, + "step": 1570 + }, + { + "epoch": 0.47, + "learning_rate": 4.3535986136721377e-05, + "loss": 1.4392, + "step": 1580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3457685702147834e-05, + "loss": 1.4097, + "step": 1590 + }, + { + "epoch": 0.47, + "learning_rate": 4.3378985206718484e-05, + "loss": 1.4405, + "step": 1600 + }, + { + "epoch": 0.48, + "learning_rate": 4.329988635622611e-05, + "loss": 1.4311, + "step": 1610 + }, + { + "epoch": 0.48, + "learning_rate": 4.322039086509769e-05, + "loss": 1.4358, + "step": 1620 + }, + { + "epoch": 0.48, + "learning_rate": 4.3140500456357145e-05, + "loss": 1.4114, + "step": 1630 + }, + { + "epoch": 0.49, + "learning_rate": 4.306021686158805e-05, + "loss": 1.4165, + "step": 1640 + }, + { + "epoch": 0.49, + "learning_rate": 4.297954182089609e-05, + "loss": 1.4309, + "step": 1650 + }, + { + "epoch": 0.49, + "learning_rate": 4.289847708287129e-05, + "loss": 1.4215, + "step": 1660 + }, + { + "epoch": 0.49, + "learning_rate": 4.2817024404550246e-05, + "loss": 1.4124, + "step": 1670 + }, + { + "epoch": 0.5, + "learning_rate": 4.2735185551377895e-05, + "loss": 1.4001, + "step": 1680 + }, + { + "epoch": 0.5, + "learning_rate": 4.265296229716935e-05, + "loss": 1.4302, + "step": 1690 + }, + { + "epoch": 0.5, + "learning_rate": 4.25703564240714e-05, + "loss": 1.4211, + "step": 1700 + }, + { + "epoch": 0.51, + "learning_rate": 4.2487369722523906e-05, + "loss": 1.4423, + "step": 1710 + }, + { + "epoch": 0.51, + "learning_rate": 4.240400399122101e-05, + "loss": 1.4299, + "step": 1720 + }, + { + "epoch": 0.51, + "learning_rate": 4.232026103707209e-05, + "loss": 1.4214, + "step": 1730 + }, + { + "epoch": 0.52, + "learning_rate": 4.223614267516268e-05, + "loss": 1.4348, + "step": 1740 + }, + { + "epoch": 0.52, + "learning_rate": 4.215165072871505e-05, + "loss": 1.4315, + "step": 1750 + }, + { + "epoch": 0.52, + "learning_rate": 4.206678702904874e-05, + "loss": 1.4098, + "step": 1760 + }, + { + "epoch": 0.52, + "learning_rate": 4.198155341554084e-05, + "loss": 1.4242, + "step": 1770 + }, + { + "epoch": 0.53, + "learning_rate": 4.1895951735586145e-05, + "loss": 1.4272, + "step": 1780 + }, + { + "epoch": 0.53, + "learning_rate": 4.1809983844557085e-05, + "loss": 1.4452, + "step": 1790 + }, + { + "epoch": 0.53, + "learning_rate": 4.172365160576355e-05, + "loss": 1.431, + "step": 1800 + }, + { + "epoch": 0.54, + "learning_rate": 4.163695689041245e-05, + "loss": 1.4389, + "step": 1810 + }, + { + "epoch": 0.54, + "learning_rate": 4.154990157756722e-05, + "loss": 1.413, + "step": 1820 + }, + { + "epoch": 0.54, + "learning_rate": 4.1462487554107036e-05, + "loss": 1.3893, + "step": 1830 + }, + { + "epoch": 0.55, + "learning_rate": 4.137471671468596e-05, + "loss": 1.4052, + "step": 1840 + }, + { + "epoch": 0.55, + "learning_rate": 4.128659096169183e-05, + "loss": 1.4173, + "step": 1850 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198112205205096e-05, + "loss": 1.4012, + "step": 1860 + }, + { + "epoch": 0.55, + "learning_rate": 4.110928236295734e-05, + "loss": 1.4119, + "step": 1870 + }, + { + "epoch": 0.56, + "learning_rate": 4.102010336028975e-05, + "loss": 1.4111, + "step": 1880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0930577130111424e-05, + "loss": 1.4156, + "step": 1890 + }, + { + "epoch": 0.56, + "learning_rate": 4.084070561285739e-05, + "loss": 1.4419, + "step": 1900 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750490756446624e-05, + "loss": 1.4121, + "step": 1910 + }, + { + "epoch": 0.57, + "learning_rate": 4.0659934516239795e-05, + "loss": 1.4204, + "step": 1920 + }, + { + "epoch": 0.57, + "learning_rate": 4.056903885499689e-05, + "loss": 1.4032, + "step": 1930 + }, + { + "epoch": 0.57, + "learning_rate": 4.047780574283466e-05, + "loss": 1.4207, + "step": 1940 + }, + { + "epoch": 0.58, + "learning_rate": 4.038623715718397e-05, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.58, + "learning_rate": 4.029433508274686e-05, + "loss": 1.4228, + "step": 1960 + }, + { + "epoch": 0.58, + "learning_rate": 4.0202101511453586e-05, + "loss": 1.4141, + "step": 1970 + }, + { + "epoch": 0.59, + "learning_rate": 4.010953844241943e-05, + "loss": 1.4323, + "step": 1980 + }, + { + "epoch": 0.59, + "learning_rate": 4.001664788190135e-05, + "loss": 1.4087, + "step": 1990 + }, + { + "epoch": 0.59, + "learning_rate": 3.992343184325453e-05, + "loss": 1.4186, + "step": 2000 + }, + { + "epoch": 0.6, + "learning_rate": 3.982989234688873e-05, + "loss": 1.4264, + "step": 2010 + }, + { + "epoch": 0.6, + "learning_rate": 3.973603142022448e-05, + "loss": 1.4417, + "step": 2020 + }, + { + "epoch": 0.6, + "learning_rate": 3.964185109764915e-05, + "loss": 1.4075, + "step": 2030 + }, + { + "epoch": 0.6, + "learning_rate": 3.954735342047285e-05, + "loss": 1.4143, + "step": 2040 + }, + { + "epoch": 0.61, + "learning_rate": 3.945254043688419e-05, + "loss": 1.4176, + "step": 2050 + }, + { + "epoch": 0.61, + "learning_rate": 3.935741420190587e-05, + "loss": 1.4214, + "step": 2060 + }, + { + "epoch": 0.61, + "learning_rate": 3.926197677735018e-05, + "loss": 1.4256, + "step": 2070 + }, + { + "epoch": 0.62, + "learning_rate": 3.9166230231774276e-05, + "loss": 1.4075, + "step": 2080 + }, + { + "epoch": 0.62, + "learning_rate": 3.9070176640435335e-05, + "loss": 1.3887, + "step": 2090 + }, + { + "epoch": 0.62, + "learning_rate": 3.897381808524562e-05, + "loss": 1.4225, + "step": 2100 + }, + { + "epoch": 0.63, + "learning_rate": 3.887715665472729e-05, + "loss": 1.4114, + "step": 2110 + }, + { + "epoch": 0.63, + "learning_rate": 3.8780194443967226e-05, + "loss": 1.4316, + "step": 2120 + }, + { + "epoch": 0.63, + "learning_rate": 3.8682933554571524e-05, + "loss": 1.4168, + "step": 2130 + }, + { + "epoch": 0.63, + "learning_rate": 3.858537609461999e-05, + "loss": 1.4237, + "step": 2140 + }, + { + "epoch": 0.64, + "learning_rate": 3.8487524178620464e-05, + "loss": 1.4373, + "step": 2150 + }, + { + "epoch": 0.64, + "learning_rate": 3.838937992746295e-05, + "loss": 1.4089, + "step": 2160 + }, + { + "epoch": 0.64, + "learning_rate": 3.8290945468373684e-05, + "loss": 1.4319, + "step": 2170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8192222934869e-05, + "loss": 1.4035, + "step": 2180 + }, + { + "epoch": 0.65, + "learning_rate": 3.809321446670909e-05, + "loss": 1.4161, + "step": 2190 + }, + { + "epoch": 0.65, + "learning_rate": 3.799392220985164e-05, + "loss": 1.4136, + "step": 2200 + }, + { + "epoch": 0.65, + "learning_rate": 3.789434831640533e-05, + "loss": 1.4188, + "step": 2210 + }, + { + "epoch": 0.66, + "learning_rate": 3.779449494458312e-05, + "loss": 1.4203, + "step": 2220 + }, + { + "epoch": 0.66, + "learning_rate": 3.769436425865557e-05, + "loss": 1.4263, + "step": 2230 + }, + { + "epoch": 0.66, + "learning_rate": 3.759395842890384e-05, + "loss": 1.4295, + "step": 2240 + }, + { + "epoch": 0.67, + "learning_rate": 3.749327963157274e-05, + "loss": 1.4144, + "step": 2250 + }, + { + "epoch": 0.67, + "learning_rate": 3.739233004882346e-05, + "loss": 1.4162, + "step": 2260 + }, + { + "epoch": 0.67, + "learning_rate": 3.729111186868635e-05, + "loss": 1.4099, + "step": 2270 + }, + { + "epoch": 0.68, + "learning_rate": 3.718962728501348e-05, + "loss": 1.3878, + "step": 2280 + }, + { + "epoch": 0.68, + "learning_rate": 3.708787849743106e-05, + "loss": 1.4399, + "step": 2290 + }, + { + "epoch": 0.68, + "learning_rate": 3.69858677112918e-05, + "loss": 1.4249, + "step": 2300 + }, + { + "epoch": 0.68, + "learning_rate": 3.688359713762707e-05, + "loss": 1.3925, + "step": 2310 + }, + { + "epoch": 0.69, + "learning_rate": 3.6781068993099034e-05, + "loss": 1.4036, + "step": 2320 + }, + { + "epoch": 0.69, + "learning_rate": 3.667828549995255e-05, + "loss": 1.3986, + "step": 2330 + }, + { + "epoch": 0.69, + "learning_rate": 3.657524888596703e-05, + "loss": 1.4298, + "step": 2340 + }, + { + "epoch": 0.7, + "learning_rate": 3.6471961384408155e-05, + "loss": 1.4016, + "step": 2350 + }, + { + "epoch": 0.7, + "learning_rate": 3.636842523397945e-05, + "loss": 1.3992, + "step": 2360 + }, + { + "epoch": 0.7, + "learning_rate": 3.626464267877381e-05, + "loss": 1.4441, + "step": 2370 + }, + { + "epoch": 0.71, + "learning_rate": 3.616061596822478e-05, + "loss": 1.3967, + "step": 2380 + }, + { + "epoch": 0.71, + "learning_rate": 3.6056347357057893e-05, + "loss": 1.4252, + "step": 2390 + }, + { + "epoch": 0.71, + "learning_rate": 3.595183910524173e-05, + "loss": 1.4209, + "step": 2400 + }, + { + "epoch": 0.71, + "learning_rate": 3.5847093477938956e-05, + "loss": 1.4133, + "step": 2410 + }, + { + "epoch": 0.72, + "learning_rate": 3.5742112745457235e-05, + "loss": 1.4313, + "step": 2420 + }, + { + "epoch": 0.72, + "learning_rate": 3.563689918320002e-05, + "loss": 1.4275, + "step": 2430 + }, + { + "epoch": 0.72, + "learning_rate": 3.5531455071617226e-05, + "loss": 1.421, + "step": 2440 + }, + { + "epoch": 0.73, + "learning_rate": 3.542578269615579e-05, + "loss": 1.4402, + "step": 2450 + }, + { + "epoch": 0.73, + "learning_rate": 3.5319884347210186e-05, + "loss": 1.4176, + "step": 2460 + }, + { + "epoch": 0.73, + "learning_rate": 3.521376232007271e-05, + "loss": 1.4117, + "step": 2470 + }, + { + "epoch": 0.73, + "learning_rate": 3.5107418914883794e-05, + "loss": 1.41, + "step": 2480 + }, + { + "epoch": 0.74, + "learning_rate": 3.500085643658211e-05, + "loss": 1.4313, + "step": 2490 + }, + { + "epoch": 0.74, + "learning_rate": 3.489407719485464e-05, + "loss": 1.4035, + "step": 2500 + }, + { + "epoch": 0.74, + "learning_rate": 3.4787083504086605e-05, + "loss": 1.4057, + "step": 2510 + }, + { + "epoch": 0.75, + "learning_rate": 3.467987768331127e-05, + "loss": 1.4125, + "step": 2520 + }, + { + "epoch": 0.75, + "learning_rate": 3.457246205615974e-05, + "loss": 1.4056, + "step": 2530 + }, + { + "epoch": 0.75, + "learning_rate": 3.446483895081054e-05, + "loss": 1.4082, + "step": 2540 + }, + { + "epoch": 0.76, + "learning_rate": 3.4357010699939215e-05, + "loss": 1.3915, + "step": 2550 + }, + { + "epoch": 0.76, + "learning_rate": 3.424897964066769e-05, + "loss": 1.4012, + "step": 2560 + }, + { + "epoch": 0.76, + "learning_rate": 3.4140748114513685e-05, + "loss": 1.4251, + "step": 2570 + }, + { + "epoch": 0.76, + "learning_rate": 3.403231846733994e-05, + "loss": 1.4013, + "step": 2580 + }, + { + "epoch": 0.77, + "learning_rate": 3.392369304930334e-05, + "loss": 1.4076, + "step": 2590 + }, + { + "epoch": 0.77, + "learning_rate": 3.3814874214804034e-05, + "loss": 1.3978, + "step": 2600 + }, + { + "epoch": 0.77, + "learning_rate": 3.3705864322434354e-05, + "loss": 1.408, + "step": 2610 + }, + { + "epoch": 0.78, + "learning_rate": 3.359666573492772e-05, + "loss": 1.3888, + "step": 2620 + }, + { + "epoch": 0.78, + "learning_rate": 3.3487280819107415e-05, + "loss": 1.4052, + "step": 2630 + }, + { + "epoch": 0.78, + "learning_rate": 3.33777119458353e-05, + "loss": 1.4286, + "step": 2640 + }, + { + "epoch": 0.79, + "learning_rate": 3.326796148996042e-05, + "loss": 1.4241, + "step": 2650 + }, + { + "epoch": 0.79, + "learning_rate": 3.315803183026753e-05, + "loss": 1.4049, + "step": 2660 + }, + { + "epoch": 0.79, + "learning_rate": 3.304792534942553e-05, + "loss": 1.3826, + "step": 2670 + }, + { + "epoch": 0.79, + "learning_rate": 3.293764443393582e-05, + "loss": 1.413, + "step": 2680 + }, + { + "epoch": 0.8, + "learning_rate": 3.2827191474080605e-05, + "loss": 1.4161, + "step": 2690 + }, + { + "epoch": 0.8, + "learning_rate": 3.2716568863871044e-05, + "loss": 1.382, + "step": 2700 + }, + { + "epoch": 0.8, + "learning_rate": 3.260577900099539e-05, + "loss": 1.381, + "step": 2710 + }, + { + "epoch": 0.81, + "learning_rate": 3.2494824286767e-05, + "loss": 1.396, + "step": 2720 + }, + { + "epoch": 0.81, + "learning_rate": 3.2383707126072315e-05, + "loss": 1.3923, + "step": 2730 + }, + { + "epoch": 0.81, + "learning_rate": 3.2272429927318707e-05, + "loss": 1.4044, + "step": 2740 + }, + { + "epoch": 0.81, + "learning_rate": 3.21609951023823e-05, + "loss": 1.4073, + "step": 2750 + }, + { + "epoch": 0.82, + "learning_rate": 3.204940506655568e-05, + "loss": 1.4178, + "step": 2760 + }, + { + "epoch": 0.82, + "learning_rate": 3.1937662238495544e-05, + "loss": 1.4179, + "step": 2770 + }, + { + "epoch": 0.82, + "learning_rate": 3.1825769040170285e-05, + "loss": 1.4003, + "step": 2780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1713727896807505e-05, + "loss": 1.4176, + "step": 2790 + }, + { + "epoch": 0.83, + "learning_rate": 3.160154123684143e-05, + "loss": 1.4179, + "step": 2800 + }, + { + "epoch": 0.83, + "learning_rate": 3.1489211491860276e-05, + "loss": 1.4098, + "step": 2810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1376741096553576e-05, + "loss": 1.4087, + "step": 2820 + }, + { + "epoch": 0.84, + "learning_rate": 3.126413248865935e-05, + "loss": 1.3971, + "step": 2830 + }, + { + "epoch": 0.84, + "learning_rate": 3.115138810891134e-05, + "loss": 1.3915, + "step": 2840 + }, + { + "epoch": 0.84, + "learning_rate": 3.103851040098607e-05, + "loss": 1.4041, + "step": 2850 + }, + { + "epoch": 0.85, + "learning_rate": 3.0925501811449855e-05, + "loss": 1.4129, + "step": 2860 + }, + { + "epoch": 0.85, + "learning_rate": 3.081236478970583e-05, + "loss": 1.3948, + "step": 2870 + }, + { + "epoch": 0.85, + "learning_rate": 3.069910178794082e-05, + "loss": 1.4116, + "step": 2880 + }, + { + "epoch": 0.86, + "learning_rate": 3.0585715261072206e-05, + "loss": 1.4029, + "step": 2890 + }, + { + "epoch": 0.86, + "learning_rate": 3.04722076666947e-05, + "loss": 1.399, + "step": 2900 + }, + { + "epoch": 0.86, + "learning_rate": 3.0358581465027125e-05, + "loss": 1.4061, + "step": 2910 + }, + { + "epoch": 0.87, + "learning_rate": 3.024483911885901e-05, + "loss": 1.4152, + "step": 2920 + }, + { + "epoch": 0.87, + "learning_rate": 3.013098309349729e-05, + "loss": 1.4257, + "step": 2930 + }, + { + "epoch": 0.87, + "learning_rate": 3.0017015856712814e-05, + "loss": 1.417, + "step": 2940 + }, + { + "epoch": 0.87, + "learning_rate": 2.9902939878686915e-05, + "loss": 1.3952, + "step": 2950 + }, + { + "epoch": 0.88, + "learning_rate": 2.978875763195779e-05, + "loss": 1.4252, + "step": 2960 + }, + { + "epoch": 0.88, + "learning_rate": 2.9674471591367005e-05, + "loss": 1.3982, + "step": 2970 + }, + { + "epoch": 0.88, + "learning_rate": 2.9560084234005765e-05, + "loss": 1.3948, + "step": 2980 + }, + { + "epoch": 0.89, + "learning_rate": 2.944559803916128e-05, + "loss": 1.4127, + "step": 2990 + }, + { + "epoch": 0.89, + "learning_rate": 2.9331015488263024e-05, + "loss": 1.4239, + "step": 3000 + }, + { + "epoch": 0.89, + "learning_rate": 2.9216339064828914e-05, + "loss": 1.3889, + "step": 3010 + }, + { + "epoch": 0.89, + "learning_rate": 2.910157125441152e-05, + "loss": 1.403, + "step": 3020 + }, + { + "epoch": 0.9, + "learning_rate": 2.898671454454418e-05, + "loss": 1.4106, + "step": 3030 + }, + { + "epoch": 0.9, + "learning_rate": 2.8871771424687078e-05, + "loss": 1.4123, + "step": 3040 + }, + { + "epoch": 0.9, + "learning_rate": 2.8756744386173284e-05, + "loss": 1.4137, + "step": 3050 + }, + { + "epoch": 0.91, + "learning_rate": 2.8641635922154774e-05, + "loss": 1.4009, + "step": 3060 + }, + { + "epoch": 0.91, + "learning_rate": 2.8526448527548372e-05, + "loss": 1.4159, + "step": 3070 + }, + { + "epoch": 0.91, + "learning_rate": 2.8411184698981684e-05, + "loss": 1.4071, + "step": 3080 + }, + { + "epoch": 0.92, + "learning_rate": 2.829584693473899e-05, + "loss": 1.41, + "step": 3090 + }, + { + "epoch": 0.92, + "learning_rate": 2.8180437734707064e-05, + "loss": 1.4038, + "step": 3100 + }, + { + "epoch": 0.92, + "learning_rate": 2.8064959600321043e-05, + "loss": 1.4069, + "step": 3110 + }, + { + "epoch": 0.92, + "learning_rate": 2.7949415034510163e-05, + "loss": 1.4096, + "step": 3120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7833806541643544e-05, + "loss": 1.3821, + "step": 3130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7718136627475865e-05, + "loss": 1.3886, + "step": 3140 + }, + { + "epoch": 0.93, + "learning_rate": 2.76024077990931e-05, + "loss": 1.3923, + "step": 3150 + }, + { + "epoch": 0.94, + "learning_rate": 2.748662256485816e-05, + "loss": 1.4072, + "step": 3160 + }, + { + "epoch": 0.94, + "learning_rate": 2.7370783434356512e-05, + "loss": 1.4126, + "step": 3170 + }, + { + "epoch": 0.94, + "learning_rate": 2.7254892918341802e-05, + "loss": 1.4238, + "step": 3180 + }, + { + "epoch": 0.95, + "learning_rate": 2.713895352868144e-05, + "loss": 1.4183, + "step": 3190 + }, + { + "epoch": 0.95, + "learning_rate": 2.702296777830212e-05, + "loss": 1.4056, + "step": 3200 + }, + { + "epoch": 0.95, + "learning_rate": 2.6906938181135423e-05, + "loss": 1.4096, + "step": 3210 + }, + { + "epoch": 0.95, + "learning_rate": 2.6790867252063247e-05, + "loss": 1.4018, + "step": 3220 + }, + { + "epoch": 0.96, + "learning_rate": 2.6674757506863357e-05, + "loss": 1.3922, + "step": 3230 + }, + { + "epoch": 0.96, + "learning_rate": 2.655861146215483e-05, + "loss": 1.4054, + "step": 3240 + }, + { + "epoch": 0.96, + "learning_rate": 2.6442431635343528e-05, + "loss": 1.3914, + "step": 3250 + }, + { + "epoch": 0.97, + "learning_rate": 2.6326220544567514e-05, + "loss": 1.3851, + "step": 3260 + }, + { + "epoch": 0.97, + "learning_rate": 2.620998070864248e-05, + "loss": 1.4102, + "step": 3270 + }, + { + "epoch": 0.97, + "learning_rate": 2.6093714647007156e-05, + "loss": 1.4069, + "step": 3280 + }, + { + "epoch": 0.97, + "learning_rate": 2.5977424879668705e-05, + "loss": 1.3919, + "step": 3290 + }, + { + "epoch": 0.98, + "learning_rate": 2.5861113927148096e-05, + "loss": 1.4073, + "step": 3300 + }, + { + "epoch": 0.98, + "learning_rate": 2.5744784310425467e-05, + "loss": 1.4025, + "step": 3310 + }, + { + "epoch": 0.98, + "learning_rate": 2.562843855088551e-05, + "loss": 1.3805, + "step": 3320 + }, + { + "epoch": 0.99, + "learning_rate": 2.5512079170262793e-05, + "loss": 1.4032, + "step": 3330 + }, + { + "epoch": 0.99, + "learning_rate": 2.5395708690587117e-05, + "loss": 1.4232, + "step": 3340 + }, + { + "epoch": 0.99, + "learning_rate": 2.527932963412885e-05, + "loss": 1.3897, + "step": 3350 + }, + { + "epoch": 1.0, + "learning_rate": 2.5162944523344256e-05, + "loss": 1.4008, + "step": 3360 + }, + { + "epoch": 1.0, + "learning_rate": 2.5046555880820826e-05, + "loss": 1.3936, + "step": 3370 + }, + { + "epoch": 1.0, + "learning_rate": 2.4930166229222597e-05, + "loss": 1.394, + "step": 3380 + }, + { + "epoch": 1.0, + "learning_rate": 2.481377809123547e-05, + "loss": 1.3903, + "step": 3390 + }, + { + "epoch": 1.01, + "learning_rate": 2.469739398951256e-05, + "loss": 1.3869, + "step": 3400 + }, + { + "epoch": 1.01, + "learning_rate": 2.458101644661947e-05, + "loss": 1.429, + "step": 3410 + }, + { + "epoch": 1.01, + "learning_rate": 2.4464647984979667e-05, + "loss": 1.3987, + "step": 3420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4348291126819783e-05, + "loss": 1.38, + "step": 3430 + }, + { + "epoch": 1.02, + "learning_rate": 2.4231948394114936e-05, + "loss": 1.3906, + "step": 3440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4115622308534096e-05, + "loss": 1.3931, + "step": 3450 + }, + { + "epoch": 1.03, + "learning_rate": 2.399931539138541e-05, + "loss": 1.4135, + "step": 3460 + }, + { + "epoch": 1.03, + "learning_rate": 2.388303016356156e-05, + "loss": 1.3952, + "step": 3470 + }, + { + "epoch": 1.03, + "learning_rate": 2.3766769145485125e-05, + "loss": 1.3972, + "step": 3480 + }, + { + "epoch": 1.03, + "learning_rate": 2.3650534857053943e-05, + "loss": 1.3937, + "step": 3490 + }, + { + "epoch": 1.04, + "learning_rate": 2.3534329817586513e-05, + "loss": 1.3936, + "step": 3500 + }, + { + "epoch": 1.04, + "learning_rate": 2.3418156545767365e-05, + "loss": 1.397, + "step": 3510 + }, + { + "epoch": 1.04, + "learning_rate": 2.3302017559592494e-05, + "loss": 1.3849, + "step": 3520 + }, + { + "epoch": 1.05, + "learning_rate": 2.318591537631476e-05, + "loss": 1.4118, + "step": 3530 + }, + { + "epoch": 1.05, + "learning_rate": 2.3069852512389335e-05, + "loss": 1.414, + "step": 3540 + }, + { + "epoch": 1.05, + "learning_rate": 2.2953831483419184e-05, + "loss": 1.4088, + "step": 3550 + }, + { + "epoch": 1.05, + "learning_rate": 2.2837854804100504e-05, + "loss": 1.3773, + "step": 3560 + }, + { + "epoch": 1.06, + "learning_rate": 2.272192498816825e-05, + "loss": 1.3977, + "step": 3570 + }, + { + "epoch": 1.06, + "learning_rate": 2.260604454834162e-05, + "loss": 1.3591, + "step": 3580 + }, + { + "epoch": 1.06, + "learning_rate": 2.2490215996269617e-05, + "loss": 1.4023, + "step": 3590 + }, + { + "epoch": 1.07, + "learning_rate": 2.237444184247661e-05, + "loss": 1.3873, + "step": 3600 + }, + { + "epoch": 1.07, + "learning_rate": 2.2258724596307915e-05, + "loss": 1.3826, + "step": 3610 + }, + { + "epoch": 1.07, + "learning_rate": 2.214306676587539e-05, + "loss": 1.3732, + "step": 3620 + }, + { + "epoch": 1.08, + "learning_rate": 2.2027470858003098e-05, + "loss": 1.3988, + "step": 3630 + }, + { + "epoch": 1.08, + "learning_rate": 2.1911939378172956e-05, + "loss": 1.4036, + "step": 3640 + }, + { + "epoch": 1.08, + "learning_rate": 2.1796474830470447e-05, + "loss": 1.4236, + "step": 3650 + }, + { + "epoch": 1.08, + "learning_rate": 2.1681079717530328e-05, + "loss": 1.4032, + "step": 3660 + }, + { + "epoch": 1.09, + "learning_rate": 2.156575654048239e-05, + "loss": 1.39, + "step": 3670 + }, + { + "epoch": 1.09, + "learning_rate": 2.145050779889725e-05, + "loss": 1.3757, + "step": 3680 + }, + { + "epoch": 1.09, + "learning_rate": 2.1335335990732186e-05, + "loss": 1.3934, + "step": 3690 + }, + { + "epoch": 1.1, + "learning_rate": 2.1220243612276964e-05, + "loss": 1.3979, + "step": 3700 + }, + { + "epoch": 1.1, + "learning_rate": 2.110523315809978e-05, + "loss": 1.4181, + "step": 3710 + }, + { + "epoch": 1.1, + "learning_rate": 2.0990307120993134e-05, + "loss": 1.406, + "step": 3720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0875467991919854e-05, + "loss": 1.4036, + "step": 3730 + }, + { + "epoch": 1.11, + "learning_rate": 2.076071825995906e-05, + "loss": 1.4095, + "step": 3740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0646060412252246e-05, + "loss": 1.4048, + "step": 3750 + }, + { + "epoch": 1.11, + "learning_rate": 2.0531496933949363e-05, + "loss": 1.3874, + "step": 3760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417030308154953e-05, + "loss": 1.3793, + "step": 3770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0302663015874322e-05, + "loss": 1.4152, + "step": 3780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0188397535959785e-05, + "loss": 1.3738, + "step": 3790 + }, + { + "epoch": 1.13, + "learning_rate": 2.007423634505692e-05, + "loss": 1.4033, + "step": 3800 + }, + { + "epoch": 1.13, + "learning_rate": 1.9960181917550897e-05, + "loss": 1.3753, + "step": 3810 + }, + { + "epoch": 1.13, + "learning_rate": 1.9846236725512835e-05, + "loss": 1.3791, + "step": 3820 + }, + { + "epoch": 1.13, + "learning_rate": 1.973240323864624e-05, + "loss": 1.3837, + "step": 3830 + }, + { + "epoch": 1.14, + "learning_rate": 1.9618683924233467e-05, + "loss": 1.3945, + "step": 3840 + }, + { + "epoch": 1.14, + "learning_rate": 1.9505081247082237e-05, + "loss": 1.3804, + "step": 3850 + }, + { + "epoch": 1.14, + "learning_rate": 1.9391597669472213e-05, + "loss": 1.3964, + "step": 3860 + }, + { + "epoch": 1.15, + "learning_rate": 1.927823565110165e-05, + "loss": 1.3983, + "step": 3870 + }, + { + "epoch": 1.15, + "learning_rate": 1.9164997649034058e-05, + "loss": 1.4169, + "step": 3880 + }, + { + "epoch": 1.15, + "learning_rate": 1.9051886117644963e-05, + "loss": 1.4101, + "step": 3890 + }, + { + "epoch": 1.16, + "learning_rate": 1.89389035085687e-05, + "loss": 1.3823, + "step": 3900 + }, + { + "epoch": 1.16, + "learning_rate": 1.8826052270645276e-05, + "loss": 1.3827, + "step": 3910 + }, + { + "epoch": 1.16, + "learning_rate": 1.8713334849867315e-05, + "loss": 1.4035, + "step": 3920 + }, + { + "epoch": 1.16, + "learning_rate": 1.8600753689327e-05, + "loss": 1.4081, + "step": 3930 + }, + { + "epoch": 1.17, + "learning_rate": 1.8488311229163152e-05, + "loss": 1.3919, + "step": 3940 + }, + { + "epoch": 1.17, + "learning_rate": 1.8376009906508338e-05, + "loss": 1.3854, + "step": 3950 + }, + { + "epoch": 1.17, + "learning_rate": 1.826385215543603e-05, + "loss": 1.3924, + "step": 3960 + }, + { + "epoch": 1.18, + "learning_rate": 1.8151840406907873e-05, + "loss": 1.3851, + "step": 3970 + }, + { + "epoch": 1.18, + "learning_rate": 1.8039977088720972e-05, + "loss": 1.3707, + "step": 3980 + }, + { + "epoch": 1.18, + "learning_rate": 1.7928264625455282e-05, + "loss": 1.3998, + "step": 3990 + }, + { + "epoch": 1.19, + "learning_rate": 1.7816705438421064e-05, + "loss": 1.3931, + "step": 4000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7705301945606384e-05, + "loss": 1.3976, + "step": 4010 + }, + { + "epoch": 1.19, + "learning_rate": 1.7594056561624716e-05, + "loss": 1.3785, + "step": 4020 + }, + { + "epoch": 1.19, + "learning_rate": 1.748297169766262e-05, + "loss": 1.3845, + "step": 4030 + }, + { + "epoch": 1.2, + "learning_rate": 1.7372049761427457e-05, + "loss": 1.3926, + "step": 4040 + }, + { + "epoch": 1.2, + "learning_rate": 1.7261293157095204e-05, + "loss": 1.4075, + "step": 4050 + }, + { + "epoch": 1.2, + "learning_rate": 1.7150704285258375e-05, + "loss": 1.3938, + "step": 4060 + }, + { + "epoch": 1.21, + "learning_rate": 1.7040285542873945e-05, + "loss": 1.3884, + "step": 4070 + }, + { + "epoch": 1.21, + "learning_rate": 1.6930039323211448e-05, + "loss": 1.4066, + "step": 4080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6819968015801048e-05, + "loss": 1.3992, + "step": 4090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6710074006381797e-05, + "loss": 1.4066, + "step": 4100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6600359676849892e-05, + "loss": 1.4076, + "step": 4110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6490827405207062e-05, + "loss": 1.4078, + "step": 4120 + }, + { + "epoch": 1.22, + "learning_rate": 1.638147956550904e-05, + "loss": 1.4026, + "step": 4130 + }, + { + "epoch": 1.23, + "learning_rate": 1.627231852781407e-05, + "loss": 1.3861, + "step": 4140 + }, + { + "epoch": 1.23, + "learning_rate": 1.6163346658131567e-05, + "loss": 1.3915, + "step": 4150 + }, + { + "epoch": 1.23, + "learning_rate": 1.6054566318370832e-05, + "loss": 1.3828, + "step": 4160 + }, + { + "epoch": 1.24, + "learning_rate": 1.5945979866289844e-05, + "loss": 1.3952, + "step": 4170 + }, + { + "epoch": 1.24, + "learning_rate": 1.583758965544417e-05, + "loss": 1.3892, + "step": 4180 + }, + { + "epoch": 1.24, + "learning_rate": 1.5729398035135957e-05, + "loss": 1.3973, + "step": 4190 + }, + { + "epoch": 1.24, + "learning_rate": 1.5621407350362986e-05, + "loss": 1.4225, + "step": 4200 + }, + { + "epoch": 1.25, + "learning_rate": 1.5513619941767886e-05, + "loss": 1.3948, + "step": 4210 + }, + { + "epoch": 1.25, + "learning_rate": 1.540603814558736e-05, + "loss": 1.4074, + "step": 4220 + }, + { + "epoch": 1.25, + "learning_rate": 1.5298664293601574e-05, + "loss": 1.3965, + "step": 4230 + }, + { + "epoch": 1.26, + "learning_rate": 1.5191500713083615e-05, + "loss": 1.3743, + "step": 4240 + }, + { + "epoch": 1.26, + "learning_rate": 1.508454972674904e-05, + "loss": 1.384, + "step": 4250 + }, + { + "epoch": 1.26, + "learning_rate": 1.4977813652705535e-05, + "loss": 1.4018, + "step": 4260 + }, + { + "epoch": 1.27, + "learning_rate": 1.4871294804402675e-05, + "loss": 1.3904, + "step": 4270 + }, + { + "epoch": 1.27, + "learning_rate": 1.4764995490581779e-05, + "loss": 1.3981, + "step": 4280 + }, + { + "epoch": 1.27, + "learning_rate": 1.465891801522587e-05, + "loss": 1.4144, + "step": 4290 + }, + { + "epoch": 1.27, + "learning_rate": 1.4553064677509731e-05, + "loss": 1.4172, + "step": 4300 + }, + { + "epoch": 1.28, + "learning_rate": 1.4447437771750078e-05, + "loss": 1.3873, + "step": 4310 + }, + { + "epoch": 1.28, + "learning_rate": 1.4342039587355832e-05, + "loss": 1.3983, + "step": 4320 + }, + { + "epoch": 1.28, + "learning_rate": 1.423687240877849e-05, + "loss": 1.4007, + "step": 4330 + }, + { + "epoch": 1.29, + "learning_rate": 1.4131938515462639e-05, + "loss": 1.4088, + "step": 4340 + }, + { + "epoch": 1.29, + "learning_rate": 1.4027240181796508e-05, + "loss": 1.3941, + "step": 4350 + }, + { + "epoch": 1.29, + "learning_rate": 1.3922779677062689e-05, + "loss": 1.3975, + "step": 4360 + }, + { + "epoch": 1.29, + "learning_rate": 1.3818559265388964e-05, + "loss": 1.3842, + "step": 4370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3714581205699214e-05, + "loss": 1.4011, + "step": 4380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3610847751664473e-05, + "loss": 1.3881, + "step": 4390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3507361151654067e-05, + "loss": 1.4028, + "step": 4400 + }, + { + "epoch": 1.31, + "learning_rate": 1.340412364868689e-05, + "loss": 1.3973, + "step": 4410 + }, + { + "epoch": 1.31, + "learning_rate": 1.3301137480382786e-05, + "loss": 1.445, + "step": 4420 + }, + { + "epoch": 1.31, + "learning_rate": 1.3198404878914044e-05, + "loss": 1.3957, + "step": 4430 + }, + { + "epoch": 1.32, + "learning_rate": 1.3095928070957037e-05, + "loss": 1.395, + "step": 4440 + }, + { + "epoch": 1.32, + "learning_rate": 1.2993709277643922e-05, + "loss": 1.4157, + "step": 4450 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891750714514545e-05, + "loss": 1.4074, + "step": 4460 + }, + { + "epoch": 1.32, + "learning_rate": 1.2790054591468381e-05, + "loss": 1.3988, + "step": 4470 + }, + { + "epoch": 1.33, + "learning_rate": 1.2688623112716652e-05, + "loss": 1.3914, + "step": 4480 + }, + { + "epoch": 1.33, + "learning_rate": 1.2587458476734559e-05, + "loss": 1.3864, + "step": 4490 + }, + { + "epoch": 1.33, + "learning_rate": 1.248656287621362e-05, + "loss": 1.3934, + "step": 4500 + }, + { + "epoch": 1.34, + "learning_rate": 1.2385938498014138e-05, + "loss": 1.3893, + "step": 4510 + }, + { + "epoch": 1.34, + "learning_rate": 1.2285587523117825e-05, + "loss": 1.3991, + "step": 4520 + }, + { + "epoch": 1.34, + "learning_rate": 1.2185512126580512e-05, + "loss": 1.376, + "step": 4530 + }, + { + "epoch": 1.35, + "learning_rate": 1.2085714477484997e-05, + "loss": 1.3799, + "step": 4540 + }, + { + "epoch": 1.35, + "learning_rate": 1.1986196738894078e-05, + "loss": 1.3738, + "step": 4550 + }, + { + "epoch": 1.35, + "learning_rate": 1.188696106780361e-05, + "loss": 1.3754, + "step": 4560 + }, + { + "epoch": 1.35, + "learning_rate": 1.178800961509578e-05, + "loss": 1.4006, + "step": 4570 + }, + { + "epoch": 1.36, + "learning_rate": 1.1689344525492497e-05, + "loss": 1.4012, + "step": 4580 + }, + { + "epoch": 1.36, + "learning_rate": 1.1590967937508895e-05, + "loss": 1.3973, + "step": 4590 + }, + { + "epoch": 1.36, + "learning_rate": 1.149288198340698e-05, + "loss": 1.3737, + "step": 4600 + }, + { + "epoch": 1.37, + "learning_rate": 1.1395088789149419e-05, + "loss": 1.3998, + "step": 4610 + }, + { + "epoch": 1.37, + "learning_rate": 1.1297590474353464e-05, + "loss": 1.4053, + "step": 4620 + }, + { + "epoch": 1.37, + "learning_rate": 1.1200389152245003e-05, + "loss": 1.4038, + "step": 4630 + }, + { + "epoch": 1.37, + "learning_rate": 1.1103486929612759e-05, + "loss": 1.3968, + "step": 4640 + }, + { + "epoch": 1.38, + "learning_rate": 1.1006885906762626e-05, + "loss": 1.4037, + "step": 4650 + }, + { + "epoch": 1.38, + "learning_rate": 1.0910588177472153e-05, + "loss": 1.3901, + "step": 4660 + }, + { + "epoch": 1.38, + "learning_rate": 1.0814595828945154e-05, + "loss": 1.379, + "step": 4670 + }, + { + "epoch": 1.39, + "learning_rate": 1.0718910941766478e-05, + "loss": 1.3808, + "step": 4680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0623535589856887e-05, + "loss": 1.4105, + "step": 4690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0528471840428142e-05, + "loss": 1.3756, + "step": 4700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0433721753938182e-05, + "loss": 1.3708, + "step": 4710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0339287384046462e-05, + "loss": 1.3924, + "step": 4720 + }, + { + "epoch": 1.4, + "learning_rate": 1.024517077756943e-05, + "loss": 1.3854, + "step": 4730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0151373974436184e-05, + "loss": 1.3908, + "step": 4740 + }, + { + "epoch": 1.41, + "learning_rate": 1.0057899007644245e-05, + "loss": 1.3953, + "step": 4750 + }, + { + "epoch": 1.41, + "learning_rate": 9.964747903215513e-06, + "loss": 1.3933, + "step": 4760 + }, + { + "epoch": 1.41, + "learning_rate": 9.871922680152318e-06, + "loss": 1.3854, + "step": 4770 + }, + { + "epoch": 1.42, + "learning_rate": 9.779425350393685e-06, + "loss": 1.4026, + "step": 4780 + }, + { + "epoch": 1.42, + "learning_rate": 9.687257918771719e-06, + "loss": 1.3958, + "step": 4790 + }, + { + "epoch": 1.42, + "learning_rate": 9.595422382968156e-06, + "loss": 1.3777, + "step": 4800 + }, + { + "epoch": 1.43, + "learning_rate": 9.503920733471052e-06, + "loss": 1.3835, + "step": 4810 + }, + { + "epoch": 1.43, + "learning_rate": 9.412754953531663e-06, + "loss": 1.3768, + "step": 4820 + }, + { + "epoch": 1.43, + "learning_rate": 9.321927019121435e-06, + "loss": 1.3846, + "step": 4830 + }, + { + "epoch": 1.43, + "learning_rate": 9.231438898889184e-06, + "loss": 1.3878, + "step": 4840 + }, + { + "epoch": 1.44, + "learning_rate": 9.141292554118435e-06, + "loss": 1.38, + "step": 4850 + }, + { + "epoch": 1.44, + "learning_rate": 9.051489938684903e-06, + "loss": 1.3841, + "step": 4860 + }, + { + "epoch": 1.44, + "learning_rate": 8.962032999014144e-06, + "loss": 1.4122, + "step": 4870 + }, + { + "epoch": 1.45, + "learning_rate": 8.87292367403937e-06, + "loss": 1.3839, + "step": 4880 + }, + { + "epoch": 1.45, + "learning_rate": 8.784163895159428e-06, + "loss": 1.3932, + "step": 4890 + }, + { + "epoch": 1.45, + "learning_rate": 8.695755586196924e-06, + "loss": 1.4012, + "step": 4900 + }, + { + "epoch": 1.45, + "learning_rate": 8.607700663356543e-06, + "loss": 1.3931, + "step": 4910 + }, + { + "epoch": 1.46, + "learning_rate": 8.520001035183503e-06, + "loss": 1.4003, + "step": 4920 + }, + { + "epoch": 1.46, + "learning_rate": 8.432658602522193e-06, + "loss": 1.4064, + "step": 4930 + }, + { + "epoch": 1.46, + "learning_rate": 8.345675258474969e-06, + "loss": 1.383, + "step": 4940 + }, + { + "epoch": 1.47, + "learning_rate": 8.259052888361132e-06, + "loss": 1.4147, + "step": 4950 + }, + { + "epoch": 1.47, + "learning_rate": 8.172793369676052e-06, + "loss": 1.4064, + "step": 4960 + }, + { + "epoch": 1.47, + "learning_rate": 8.086898572050494e-06, + "loss": 1.3894, + "step": 4970 + }, + { + "epoch": 1.48, + "learning_rate": 8.00137035721007e-06, + "loss": 1.3928, + "step": 4980 + }, + { + "epoch": 1.48, + "learning_rate": 7.916210578934896e-06, + "loss": 1.4049, + "step": 4990 + }, + { + "epoch": 1.48, + "learning_rate": 7.831421083019422e-06, + "loss": 1.402, + "step": 5000 + }, + { + "epoch": 1.48, + "learning_rate": 7.747003707232415e-06, + "loss": 1.4144, + "step": 5010 + }, + { + "epoch": 1.49, + "learning_rate": 7.66296028127713e-06, + "loss": 1.3884, + "step": 5020 + }, + { + "epoch": 1.49, + "learning_rate": 7.579292626751647e-06, + "loss": 1.4116, + "step": 5030 + }, + { + "epoch": 1.49, + "learning_rate": 7.4960025571094025e-06, + "loss": 1.3828, + "step": 5040 + }, + { + "epoch": 1.5, + "learning_rate": 7.413091877619868e-06, + "loss": 1.3821, + "step": 5050 + }, + { + "epoch": 1.5, + "learning_rate": 7.330562385329429e-06, + "loss": 1.4068, + "step": 5060 + }, + { + "epoch": 1.5, + "learning_rate": 7.248415869022434e-06, + "loss": 1.3842, + "step": 5070 + }, + { + "epoch": 1.51, + "learning_rate": 7.16665410918243e-06, + "loss": 1.3937, + "step": 5080 + }, + { + "epoch": 1.51, + "learning_rate": 7.085278877953558e-06, + "loss": 1.4077, + "step": 5090 + }, + { + "epoch": 1.51, + "learning_rate": 7.004291939102148e-06, + "loss": 1.3989, + "step": 5100 + }, + { + "epoch": 1.51, + "learning_rate": 6.923695047978502e-06, + "loss": 1.3727, + "step": 5110 + }, + { + "epoch": 1.52, + "learning_rate": 6.843489951478829e-06, + "loss": 1.3842, + "step": 5120 + }, + { + "epoch": 1.52, + "learning_rate": 6.763678388007394e-06, + "loss": 1.3662, + "step": 5130 + }, + { + "epoch": 1.52, + "learning_rate": 6.684262087438839e-06, + "loss": 1.4092, + "step": 5140 + }, + { + "epoch": 1.53, + "learning_rate": 6.605242771080686e-06, + "loss": 1.399, + "step": 5150 + }, + { + "epoch": 1.53, + "learning_rate": 6.526622151636011e-06, + "loss": 1.3931, + "step": 5160 + }, + { + "epoch": 1.53, + "learning_rate": 6.448401933166351e-06, + "loss": 1.3824, + "step": 5170 + }, + { + "epoch": 1.53, + "learning_rate": 6.370583811054778e-06, + "loss": 1.3764, + "step": 5180 + }, + { + "epoch": 1.54, + "learning_rate": 6.293169471969104e-06, + "loss": 1.3835, + "step": 5190 + }, + { + "epoch": 1.54, + "learning_rate": 6.216160593825363e-06, + "loss": 1.382, + "step": 5200 + }, + { + "epoch": 1.54, + "learning_rate": 6.1395588457514226e-06, + "loss": 1.3983, + "step": 5210 + }, + { + "epoch": 1.55, + "learning_rate": 6.063365888050829e-06, + "loss": 1.3709, + "step": 5220 + }, + { + "epoch": 1.55, + "learning_rate": 5.987583372166794e-06, + "loss": 1.4037, + "step": 5230 + }, + { + "epoch": 1.55, + "learning_rate": 5.912212940646422e-06, + "loss": 1.3955, + "step": 5240 + }, + { + "epoch": 1.56, + "learning_rate": 5.8372562271051e-06, + "loss": 1.384, + "step": 5250 + }, + { + "epoch": 1.56, + "learning_rate": 5.762714856191087e-06, + "loss": 1.3772, + "step": 5260 + }, + { + "epoch": 1.56, + "learning_rate": 5.688590443550304e-06, + "loss": 1.3818, + "step": 5270 + }, + { + "epoch": 1.56, + "learning_rate": 5.61488459579132e-06, + "loss": 1.4039, + "step": 5280 + }, + { + "epoch": 1.57, + "learning_rate": 5.541598910450518e-06, + "loss": 1.3935, + "step": 5290 + }, + { + "epoch": 1.57, + "learning_rate": 5.4687349759574845e-06, + "loss": 1.402, + "step": 5300 + }, + { + "epoch": 1.57, + "learning_rate": 5.396294371600569e-06, + "loss": 1.3774, + "step": 5310 + }, + { + "epoch": 1.58, + "learning_rate": 5.3242786674926545e-06, + "loss": 1.3936, + "step": 5320 + }, + { + "epoch": 1.58, + "learning_rate": 5.252689424537139e-06, + "loss": 1.3914, + "step": 5330 + }, + { + "epoch": 1.58, + "learning_rate": 5.181528194394081e-06, + "loss": 1.3931, + "step": 5340 + }, + { + "epoch": 1.59, + "learning_rate": 5.11079651944659e-06, + "loss": 1.3854, + "step": 5350 + }, + { + "epoch": 1.59, + "learning_rate": 5.040495932767386e-06, + "loss": 1.4101, + "step": 5360 + }, + { + "epoch": 1.59, + "learning_rate": 4.970627958085574e-06, + "loss": 1.3929, + "step": 5370 + }, + { + "epoch": 1.59, + "learning_rate": 4.901194109753607e-06, + "loss": 1.3826, + "step": 5380 + }, + { + "epoch": 1.6, + "learning_rate": 4.832195892714489e-06, + "loss": 1.3974, + "step": 5390 + }, + { + "epoch": 1.6, + "learning_rate": 4.763634802469124e-06, + "loss": 1.3988, + "step": 5400 + }, + { + "epoch": 1.6, + "learning_rate": 4.6955123250439245e-06, + "loss": 1.3875, + "step": 5410 + }, + { + "epoch": 1.61, + "learning_rate": 4.6278299369585916e-06, + "loss": 1.3881, + "step": 5420 + }, + { + "epoch": 1.61, + "learning_rate": 4.560589105194121e-06, + "loss": 1.3924, + "step": 5430 + }, + { + "epoch": 1.61, + "learning_rate": 4.493791287160998e-06, + "loss": 1.4058, + "step": 5440 + }, + { + "epoch": 1.61, + "learning_rate": 4.4274379306676164e-06, + "loss": 1.3946, + "step": 5450 + }, + { + "epoch": 1.62, + "learning_rate": 4.361530473888889e-06, + "loss": 1.4045, + "step": 5460 + }, + { + "epoch": 1.62, + "learning_rate": 4.296070345335085e-06, + "loss": 1.3817, + "step": 5470 + }, + { + "epoch": 1.62, + "learning_rate": 4.231058963820867e-06, + "loss": 1.3989, + "step": 5480 + }, + { + "epoch": 1.63, + "learning_rate": 4.166497738434527e-06, + "loss": 1.4004, + "step": 5490 + }, + { + "epoch": 1.63, + "learning_rate": 4.102388068507465e-06, + "loss": 1.3905, + "step": 5500 + }, + { + "epoch": 1.63, + "learning_rate": 4.03873134358384e-06, + "loss": 1.3731, + "step": 5510 + }, + { + "epoch": 1.64, + "learning_rate": 3.9755289433904694e-06, + "loss": 1.383, + "step": 5520 + }, + { + "epoch": 1.64, + "learning_rate": 3.912782237806903e-06, + "loss": 1.3853, + "step": 5530 + }, + { + "epoch": 1.64, + "learning_rate": 3.850492586835755e-06, + "loss": 1.4039, + "step": 5540 + }, + { + "epoch": 1.64, + "learning_rate": 3.788661340573213e-06, + "loss": 1.3703, + "step": 5550 + }, + { + "epoch": 1.65, + "learning_rate": 3.7272898391797734e-06, + "loss": 1.388, + "step": 5560 + }, + { + "epoch": 1.65, + "learning_rate": 3.6663794128512038e-06, + "loss": 1.3785, + "step": 5570 + }, + { + "epoch": 1.65, + "learning_rate": 3.6059313817897065e-06, + "loss": 1.3901, + "step": 5580 + }, + { + "epoch": 1.66, + "learning_rate": 3.5459470561753e-06, + "loss": 1.3894, + "step": 5590 + }, + { + "epoch": 1.66, + "learning_rate": 3.4864277361374264e-06, + "loss": 1.3799, + "step": 5600 + }, + { + "epoch": 1.66, + "learning_rate": 3.4273747117267774e-06, + "loss": 1.3798, + "step": 5610 + }, + { + "epoch": 1.67, + "learning_rate": 3.3687892628873175e-06, + "loss": 1.3948, + "step": 5620 + }, + { + "epoch": 1.67, + "learning_rate": 3.310672659428557e-06, + "loss": 1.4071, + "step": 5630 + }, + { + "epoch": 1.67, + "learning_rate": 3.2530261609980183e-06, + "loss": 1.3993, + "step": 5640 + }, + { + "epoch": 1.67, + "learning_rate": 3.195851017053944e-06, + "loss": 1.3885, + "step": 5650 + }, + { + "epoch": 1.68, + "learning_rate": 3.1391484668382073e-06, + "loss": 1.3919, + "step": 5660 + }, + { + "epoch": 1.68, + "learning_rate": 3.0829197393494548e-06, + "loss": 1.3965, + "step": 5670 + }, + { + "epoch": 1.68, + "learning_rate": 3.0271660533164714e-06, + "loss": 1.4135, + "step": 5680 + }, + { + "epoch": 1.69, + "learning_rate": 2.9718886171717613e-06, + "loss": 1.3923, + "step": 5690 + }, + { + "epoch": 1.69, + "learning_rate": 2.9170886290253552e-06, + "loss": 1.3663, + "step": 5700 + }, + { + "epoch": 1.69, + "learning_rate": 2.8627672766388448e-06, + "loss": 1.3772, + "step": 5710 + }, + { + "epoch": 1.69, + "learning_rate": 2.8089257373996424e-06, + "loss": 1.373, + "step": 5720 + }, + { + "epoch": 1.7, + "learning_rate": 2.755565178295447e-06, + "loss": 1.3858, + "step": 5730 + }, + { + "epoch": 1.7, + "learning_rate": 2.7026867558889694e-06, + "loss": 1.3996, + "step": 5740 + }, + { + "epoch": 1.7, + "learning_rate": 2.6502916162928463e-06, + "loss": 1.4194, + "step": 5750 + }, + { + "epoch": 1.71, + "learning_rate": 2.5983808951448196e-06, + "loss": 1.3895, + "step": 5760 + }, + { + "epoch": 1.71, + "learning_rate": 2.5469557175830993e-06, + "loss": 1.376, + "step": 5770 + }, + { + "epoch": 1.71, + "learning_rate": 2.496017198221995e-06, + "loss": 1.3901, + "step": 5780 + }, + { + "epoch": 1.72, + "learning_rate": 2.445566441127742e-06, + "loss": 1.3782, + "step": 5790 + }, + { + "epoch": 1.72, + "learning_rate": 2.3956045397945826e-06, + "loss": 1.3723, + "step": 5800 + }, + { + "epoch": 1.72, + "learning_rate": 2.3461325771210683e-06, + "loss": 1.3705, + "step": 5810 + }, + { + "epoch": 1.72, + "learning_rate": 2.297151625386576e-06, + "loss": 1.4018, + "step": 5820 + }, + { + "epoch": 1.73, + "learning_rate": 2.2486627462280724e-06, + "loss": 1.3811, + "step": 5830 + }, + { + "epoch": 1.73, + "learning_rate": 2.200666990617098e-06, + "loss": 1.3894, + "step": 5840 + }, + { + "epoch": 1.73, + "learning_rate": 2.153165398837009e-06, + "loss": 1.3931, + "step": 5850 + }, + { + "epoch": 1.74, + "learning_rate": 2.1061590004603978e-06, + "loss": 1.3775, + "step": 5860 + }, + { + "epoch": 1.74, + "learning_rate": 2.059648814326806e-06, + "loss": 1.3897, + "step": 5870 + }, + { + "epoch": 1.74, + "learning_rate": 2.013635848520626e-06, + "loss": 1.3919, + "step": 5880 + }, + { + "epoch": 1.75, + "learning_rate": 1.9681211003492543e-06, + "loss": 1.4191, + "step": 5890 + }, + { + "epoch": 1.75, + "learning_rate": 1.923105556321475e-06, + "loss": 1.4027, + "step": 5900 + }, + { + "epoch": 1.75, + "learning_rate": 1.8785901921260784e-06, + "loss": 1.3767, + "step": 5910 + }, + { + "epoch": 1.75, + "learning_rate": 1.8345759726107193e-06, + "loss": 1.3801, + "step": 5920 + }, + { + "epoch": 1.76, + "learning_rate": 1.7910638517609962e-06, + "loss": 1.3881, + "step": 5930 + }, + { + "epoch": 1.76, + "learning_rate": 1.748054772679772e-06, + "loss": 1.3915, + "step": 5940 + }, + { + "epoch": 1.76, + "learning_rate": 1.705549667566747e-06, + "loss": 1.403, + "step": 5950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6635494576982353e-06, + "loss": 1.3882, + "step": 5960 + }, + { + "epoch": 1.77, + "learning_rate": 1.6220550534072094e-06, + "loss": 1.3902, + "step": 5970 + }, + { + "epoch": 1.77, + "learning_rate": 1.5810673540635702e-06, + "loss": 1.3928, + "step": 5980 + }, + { + "epoch": 1.77, + "learning_rate": 1.540587248054645e-06, + "loss": 1.3819, + "step": 5990 + }, + { + "epoch": 1.78, + "learning_rate": 1.5006156127659348e-06, + "loss": 1.3948, + "step": 6000 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 1.0385926229688582e+19, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6000/training_args.bin b/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/finetuning_args.json b/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..fafc24bcd05e0bda0201b5a7198b067dab53f435 --- /dev/null +++ b/finetuning_args.json @@ -0,0 +1,12 @@ +{ + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "W_pack" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3 +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..89cd4d5475ee0b51acb980fd4c9619ba51d6b225 --- /dev/null +++ b/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.0, + "train_loss": 1.4147593358881598, + "train_runtime": 86303.1376, + "train_samples_per_second": 7.507, + "train_steps_per_second": 0.078 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..3bf84954ba99e393460828eb990c3f9e0c02e993 --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,675 @@ +{"current_steps": 10, "total_steps": 6748, "loss": 1.8898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9999729068921297e-05, "epoch": 0.0, "percentage": 0.15, "elapsed_time": "0:02:08", "remaining_time": "1 day, 0:00:51"} +{"current_steps": 20, "total_steps": 6748, "loss": 1.7273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9998916281557476e-05, "epoch": 0.01, "percentage": 0.3, "elapsed_time": "0:04:19", "remaining_time": "1 day, 0:12:27"} +{"current_steps": 30, "total_steps": 6748, "loss": 1.6799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999756165552527e-05, "epoch": 0.01, "percentage": 0.44, "elapsed_time": "0:06:25", "remaining_time": "23:57:29"} +{"current_steps": 40, "total_steps": 6748, "loss": 1.6431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999566522018553e-05, "epoch": 0.01, "percentage": 0.59, "elapsed_time": "0:08:29", "remaining_time": "23:43:29"} +{"current_steps": 50, "total_steps": 6748, "loss": 1.6153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999322701664249e-05, "epoch": 0.01, "percentage": 0.74, "elapsed_time": "0:10:35", "remaining_time": "23:38:38"} +{"current_steps": 60, "total_steps": 6748, "loss": 1.5933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9990247097742984e-05, "epoch": 0.02, "percentage": 0.89, "elapsed_time": "0:12:38", "remaining_time": "23:29:48"} +{"current_steps": 70, "total_steps": 6748, "loss": 1.5913, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9986725528075205e-05, "epoch": 0.02, "percentage": 1.04, "elapsed_time": "0:14:46", "remaining_time": "23:29:49"} +{"current_steps": 80, "total_steps": 6748, "loss": 1.5434, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998266238396737e-05, "epoch": 0.02, "percentage": 1.19, "elapsed_time": "0:16:55", "remaining_time": "23:30:54"} +{"current_steps": 90, "total_steps": 6748, "loss": 1.5304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997805775348605e-05, "epoch": 0.03, "percentage": 1.33, "elapsed_time": "0:19:05", "remaining_time": "23:32:25"} +{"current_steps": 100, "total_steps": 6748, "loss": 1.5531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997291173643424e-05, "epoch": 0.03, "percentage": 1.48, "elapsed_time": "0:21:08", "remaining_time": "23:25:12"} +{"current_steps": 110, "total_steps": 6748, "loss": 1.5446, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.996722444434921e-05, "epoch": 0.03, "percentage": 1.63, "elapsed_time": "0:23:14", "remaining_time": "23:22:35"} +{"current_steps": 120, "total_steps": 6748, "loss": 1.5352, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99609960005001e-05, "epoch": 0.04, "percentage": 1.78, "elapsed_time": "0:25:26", "remaining_time": "23:25:08"} +{"current_steps": 130, "total_steps": 6748, "loss": 1.5303, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.995422653988524e-05, "epoch": 0.04, "percentage": 1.93, "elapsed_time": "0:27:40", "remaining_time": "23:28:42"} +{"current_steps": 140, "total_steps": 6748, "loss": 1.5449, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.994691620922919e-05, "epoch": 0.04, "percentage": 2.07, "elapsed_time": "0:29:45", "remaining_time": "23:24:45"} +{"current_steps": 150, "total_steps": 6748, "loss": 1.5114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.993906516697964e-05, "epoch": 0.04, "percentage": 2.22, "elapsed_time": "0:31:54", "remaining_time": "23:23:54"} +{"current_steps": 160, "total_steps": 6748, "loss": 1.5043, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9930673583303865e-05, "epoch": 0.05, "percentage": 2.37, "elapsed_time": "0:34:04", "remaining_time": "23:23:08"} +{"current_steps": 170, "total_steps": 6748, "loss": 1.5476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.992174164008515e-05, "epoch": 0.05, "percentage": 2.52, "elapsed_time": "0:36:13", "remaining_time": "23:21:46"} +{"current_steps": 180, "total_steps": 6748, "loss": 1.5107, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.991226953091877e-05, "epoch": 0.05, "percentage": 2.67, "elapsed_time": "0:38:18", "remaining_time": "23:17:31"} +{"current_steps": 190, "total_steps": 6748, "loss": 1.5104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9902257461107824e-05, "epoch": 0.06, "percentage": 2.82, "elapsed_time": "0:40:24", "remaining_time": "23:14:47"} +{"current_steps": 200, "total_steps": 6748, "loss": 1.5298, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9891705647658795e-05, "epoch": 0.06, "percentage": 2.96, "elapsed_time": "0:42:27", "remaining_time": "23:09:59"} +{"current_steps": 210, "total_steps": 6748, "loss": 1.4907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.988061431927681e-05, "epoch": 0.06, "percentage": 3.11, "elapsed_time": "0:44:36", "remaining_time": "23:09:00"} +{"current_steps": 220, "total_steps": 6748, "loss": 1.5127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.986898371636071e-05, "epoch": 0.07, "percentage": 3.26, "elapsed_time": "0:46:53", "remaining_time": "23:11:31"} +{"current_steps": 230, "total_steps": 6748, "loss": 1.5037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.985681409099784e-05, "epoch": 0.07, "percentage": 3.41, "elapsed_time": "0:49:01", "remaining_time": "23:09:14"} +{"current_steps": 240, "total_steps": 6748, "loss": 1.5029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.984410570695858e-05, "epoch": 0.07, "percentage": 3.56, "elapsed_time": "0:51:07", "remaining_time": "23:06:31"} +{"current_steps": 250, "total_steps": 6748, "loss": 1.4725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.983085883969063e-05, "epoch": 0.07, "percentage": 3.7, "elapsed_time": "0:53:15", "remaining_time": "23:04:17"} +{"current_steps": 260, "total_steps": 6748, "loss": 1.5148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.981707377631303e-05, "epoch": 0.08, "percentage": 3.85, "elapsed_time": "0:55:24", "remaining_time": "23:02:40"} +{"current_steps": 270, "total_steps": 6748, "loss": 1.4993, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9802750815609936e-05, "epoch": 0.08, "percentage": 4.0, "elapsed_time": "0:57:37", "remaining_time": "23:02:29"} +{"current_steps": 280, "total_steps": 6748, "loss": 1.5006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.978789026802419e-05, "epoch": 0.08, "percentage": 4.15, "elapsed_time": "0:59:46", "remaining_time": "23:00:42"} +{"current_steps": 290, "total_steps": 6748, "loss": 1.4885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9772492455650494e-05, "epoch": 0.09, "percentage": 4.3, "elapsed_time": "1:01:59", "remaining_time": "23:00:36"} +{"current_steps": 300, "total_steps": 6748, "loss": 1.4898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.975655771222855e-05, "epoch": 0.09, "percentage": 4.45, "elapsed_time": "1:04:05", "remaining_time": "22:57:32"} +{"current_steps": 310, "total_steps": 6748, "loss": 1.4906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9740086383135706e-05, "epoch": 0.09, "percentage": 4.59, "elapsed_time": "1:06:15", "remaining_time": "22:55:58"} +{"current_steps": 320, "total_steps": 6748, "loss": 1.4796, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.97230788253796e-05, "epoch": 0.09, "percentage": 4.74, "elapsed_time": "1:08:22", "remaining_time": "22:53:30"} +{"current_steps": 330, "total_steps": 6748, "loss": 1.4861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.970553540759028e-05, "epoch": 0.1, "percentage": 4.89, "elapsed_time": "1:10:28", "remaining_time": "22:50:36"} +{"current_steps": 340, "total_steps": 6748, "loss": 1.4827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.968745651001231e-05, "epoch": 0.1, "percentage": 5.04, "elapsed_time": "1:12:37", "remaining_time": "22:48:41"} +{"current_steps": 350, "total_steps": 6748, "loss": 1.4884, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9668842524496526e-05, "epoch": 0.1, "percentage": 5.19, "elapsed_time": "1:14:50", "remaining_time": "22:48:04"} +{"current_steps": 360, "total_steps": 6748, "loss": 1.4873, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.964969385449149e-05, "epoch": 0.11, "percentage": 5.33, "elapsed_time": "1:17:02", "remaining_time": "22:46:56"} +{"current_steps": 370, "total_steps": 6748, "loss": 1.4848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.96300109150348e-05, "epoch": 0.11, "percentage": 5.48, "elapsed_time": "1:19:12", "remaining_time": "22:45:26"} +{"current_steps": 380, "total_steps": 6748, "loss": 1.4881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.960979413274404e-05, "epoch": 0.11, "percentage": 5.63, "elapsed_time": "1:21:20", "remaining_time": "22:43:07"} +{"current_steps": 390, "total_steps": 6748, "loss": 1.4618, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9589043945807594e-05, "epoch": 0.12, "percentage": 5.78, "elapsed_time": "1:23:31", "remaining_time": "22:41:36"} +{"current_steps": 400, "total_steps": 6748, "loss": 1.4858, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9567760803975105e-05, "epoch": 0.12, "percentage": 5.93, "elapsed_time": "1:25:33", "remaining_time": "22:37:54"} +{"current_steps": 410, "total_steps": 6748, "loss": 1.4777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.954594516854773e-05, "epoch": 0.12, "percentage": 6.08, "elapsed_time": "1:27:38", "remaining_time": "22:34:54"} +{"current_steps": 420, "total_steps": 6748, "loss": 1.4828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.952359751236817e-05, "epoch": 0.12, "percentage": 6.22, "elapsed_time": "1:29:42", "remaining_time": "22:31:43"} +{"current_steps": 430, "total_steps": 6748, "loss": 1.4571, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.950071831981038e-05, "epoch": 0.13, "percentage": 6.37, "elapsed_time": "1:31:49", "remaining_time": "22:29:18"} +{"current_steps": 440, "total_steps": 6748, "loss": 1.4724, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9477308086769117e-05, "epoch": 0.13, "percentage": 6.52, "elapsed_time": "1:33:54", "remaining_time": "22:26:21"} +{"current_steps": 450, "total_steps": 6748, "loss": 1.4771, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.945336732064915e-05, "epoch": 0.13, "percentage": 6.67, "elapsed_time": "1:36:00", "remaining_time": "22:23:40"} +{"current_steps": 460, "total_steps": 6748, "loss": 1.4604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9428896540354294e-05, "epoch": 0.14, "percentage": 6.82, "elapsed_time": "1:38:16", "remaining_time": "22:23:21"} +{"current_steps": 470, "total_steps": 6748, "loss": 1.4815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.940389627627613e-05, "epoch": 0.14, "percentage": 6.97, "elapsed_time": "1:40:25", "remaining_time": "22:21:19"} +{"current_steps": 480, "total_steps": 6748, "loss": 1.4859, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.937836707028255e-05, "epoch": 0.14, "percentage": 7.11, "elapsed_time": "1:42:29", "remaining_time": "22:18:26"} +{"current_steps": 490, "total_steps": 6748, "loss": 1.4715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.935230947570597e-05, "epoch": 0.15, "percentage": 7.26, "elapsed_time": "1:44:36", "remaining_time": "22:16:01"} +{"current_steps": 500, "total_steps": 6748, "loss": 1.4759, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.932572405733137e-05, "epoch": 0.15, "percentage": 7.41, "elapsed_time": "1:46:44", "remaining_time": "22:13:53"} +{"current_steps": 510, "total_steps": 6748, "loss": 1.4678, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.929861139138404e-05, "epoch": 0.15, "percentage": 7.56, "elapsed_time": "1:48:52", "remaining_time": "22:11:47"} +{"current_steps": 520, "total_steps": 6748, "loss": 1.4754, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9270972065517083e-05, "epoch": 0.15, "percentage": 7.71, "elapsed_time": "1:51:01", "remaining_time": "22:09:47"} +{"current_steps": 530, "total_steps": 6748, "loss": 1.462, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.924280667879869e-05, "epoch": 0.16, "percentage": 7.85, "elapsed_time": "1:53:14", "remaining_time": "22:08:35"} +{"current_steps": 540, "total_steps": 6748, "loss": 1.4704, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.921411584169915e-05, "epoch": 0.16, "percentage": 8.0, "elapsed_time": "1:55:27", "remaining_time": "22:07:22"} +{"current_steps": 550, "total_steps": 6748, "loss": 1.4661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.918490017607761e-05, "epoch": 0.16, "percentage": 8.15, "elapsed_time": "1:57:37", "remaining_time": "22:05:34"} +{"current_steps": 560, "total_steps": 6748, "loss": 1.471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.915516031516863e-05, "epoch": 0.17, "percentage": 8.3, "elapsed_time": "1:59:45", "remaining_time": "22:03:23"} +{"current_steps": 570, "total_steps": 6748, "loss": 1.451, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.912489690356841e-05, "epoch": 0.17, "percentage": 8.45, "elapsed_time": "2:01:51", "remaining_time": "22:00:47"} +{"current_steps": 580, "total_steps": 6748, "loss": 1.4411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.909411059722084e-05, "epoch": 0.17, "percentage": 8.6, "elapsed_time": "2:04:02", "remaining_time": "21:59:02"} +{"current_steps": 590, "total_steps": 6748, "loss": 1.456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9062802063403316e-05, "epoch": 0.17, "percentage": 8.74, "elapsed_time": "2:06:07", "remaining_time": "21:56:28"} +{"current_steps": 600, "total_steps": 6748, "loss": 1.4678, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.90309719807122e-05, "epoch": 0.18, "percentage": 8.89, "elapsed_time": "2:08:16", "remaining_time": "21:54:28"} +{"current_steps": 610, "total_steps": 6748, "loss": 1.479, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8998621039048205e-05, "epoch": 0.18, "percentage": 9.04, "elapsed_time": "2:10:29", "remaining_time": "21:53:03"} +{"current_steps": 620, "total_steps": 6748, "loss": 1.4471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.896574993960136e-05, "epoch": 0.18, "percentage": 9.19, "elapsed_time": "2:12:39", "remaining_time": "21:51:12"} +{"current_steps": 630, "total_steps": 6748, "loss": 1.453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.893235939483587e-05, "epoch": 0.19, "percentage": 9.34, "elapsed_time": "2:14:47", "remaining_time": "21:48:59"} +{"current_steps": 640, "total_steps": 6748, "loss": 1.4696, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8898450128474626e-05, "epoch": 0.19, "percentage": 9.48, "elapsed_time": "2:16:51", "remaining_time": "21:46:13"} +{"current_steps": 650, "total_steps": 6748, "loss": 1.4526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.886402287548357e-05, "epoch": 0.19, "percentage": 9.63, "elapsed_time": "2:19:00", "remaining_time": "21:44:03"} +{"current_steps": 660, "total_steps": 6748, "loss": 1.4429, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8829078382055725e-05, "epoch": 0.2, "percentage": 9.78, "elapsed_time": "2:21:05", "remaining_time": "21:41:31"} +{"current_steps": 670, "total_steps": 6748, "loss": 1.4491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8793617405595025e-05, "epoch": 0.2, "percentage": 9.93, "elapsed_time": "2:23:12", "remaining_time": "21:39:06"} +{"current_steps": 680, "total_steps": 6748, "loss": 1.4411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8757640714699924e-05, "epoch": 0.2, "percentage": 10.08, "elapsed_time": "2:25:25", "remaining_time": "21:37:45"} +{"current_steps": 690, "total_steps": 6748, "loss": 1.4543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.872114908914671e-05, "epoch": 0.2, "percentage": 10.23, "elapsed_time": "2:27:32", "remaining_time": "21:35:24"} +{"current_steps": 700, "total_steps": 6748, "loss": 1.4556, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8684143319872636e-05, "epoch": 0.21, "percentage": 10.37, "elapsed_time": "2:29:41", "remaining_time": "21:33:18"} +{"current_steps": 710, "total_steps": 6748, "loss": 1.4506, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.864662420895873e-05, "epoch": 0.21, "percentage": 10.52, "elapsed_time": "2:31:44", "remaining_time": "21:30:27"} +{"current_steps": 720, "total_steps": 6748, "loss": 1.4671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.860859256961244e-05, "epoch": 0.21, "percentage": 10.67, "elapsed_time": "2:33:51", "remaining_time": "21:28:06"} +{"current_steps": 730, "total_steps": 6748, "loss": 1.4469, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.857004922615002e-05, "epoch": 0.22, "percentage": 10.82, "elapsed_time": "2:36:03", "remaining_time": "21:26:27"} +{"current_steps": 740, "total_steps": 6748, "loss": 1.4554, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8530995013978645e-05, "epoch": 0.22, "percentage": 10.97, "elapsed_time": "2:38:09", "remaining_time": "21:24:07"} +{"current_steps": 750, "total_steps": 6748, "loss": 1.4671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.84914307795783e-05, "epoch": 0.22, "percentage": 11.11, "elapsed_time": "2:40:18", "remaining_time": "21:21:59"} +{"current_steps": 760, "total_steps": 6748, "loss": 1.445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.845135738048343e-05, "epoch": 0.23, "percentage": 11.26, "elapsed_time": "2:42:28", "remaining_time": "21:20:07"} +{"current_steps": 770, "total_steps": 6748, "loss": 1.4469, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.841077568526439e-05, "epoch": 0.23, "percentage": 11.41, "elapsed_time": "2:44:35", "remaining_time": "21:17:46"} +{"current_steps": 780, "total_steps": 6748, "loss": 1.4677, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.836968657350857e-05, "epoch": 0.23, "percentage": 11.56, "elapsed_time": "2:46:42", "remaining_time": "21:15:29"} +{"current_steps": 790, "total_steps": 6748, "loss": 1.4653, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.832809093580135e-05, "epoch": 0.23, "percentage": 11.71, "elapsed_time": "2:48:39", "remaining_time": "21:12:01"} +{"current_steps": 800, "total_steps": 6748, "loss": 1.4342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8285989673706826e-05, "epoch": 0.24, "percentage": 11.86, "elapsed_time": "2:50:56", "remaining_time": "21:10:54"} +{"current_steps": 810, "total_steps": 6748, "loss": 1.458, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.824338369974822e-05, "epoch": 0.24, "percentage": 12.0, "elapsed_time": "2:53:03", "remaining_time": "21:08:39"} +{"current_steps": 820, "total_steps": 6748, "loss": 1.4541, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8200273937388126e-05, "epoch": 0.24, "percentage": 12.15, "elapsed_time": "2:55:12", "remaining_time": "21:06:35"} +{"current_steps": 830, "total_steps": 6748, "loss": 1.4324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.81566613210085e-05, "epoch": 0.25, "percentage": 12.3, "elapsed_time": "2:57:19", "remaining_time": "21:04:23"} +{"current_steps": 840, "total_steps": 6748, "loss": 1.4405, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.81125467958904e-05, "epoch": 0.25, "percentage": 12.45, "elapsed_time": "2:59:30", "remaining_time": "21:02:31"} +{"current_steps": 850, "total_steps": 6748, "loss": 1.4408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.80679313181935e-05, "epoch": 0.25, "percentage": 12.6, "elapsed_time": "3:01:38", "remaining_time": "21:00:26"} +{"current_steps": 860, "total_steps": 6748, "loss": 1.4395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8022815854935356e-05, "epoch": 0.25, "percentage": 12.74, "elapsed_time": "3:03:41", "remaining_time": "20:57:38"} +{"current_steps": 870, "total_steps": 6748, "loss": 1.4359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.797720138397045e-05, "epoch": 0.26, "percentage": 12.89, "elapsed_time": "3:05:55", "remaining_time": "20:56:08"} +{"current_steps": 880, "total_steps": 6748, "loss": 1.442, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.793108889396902e-05, "epoch": 0.26, "percentage": 13.04, "elapsed_time": "3:08:04", "remaining_time": "20:54:08"} +{"current_steps": 890, "total_steps": 6748, "loss": 1.4566, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7884479384395594e-05, "epoch": 0.26, "percentage": 13.19, "elapsed_time": "3:10:16", "remaining_time": "20:52:20"} +{"current_steps": 900, "total_steps": 6748, "loss": 1.4257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7837373865487345e-05, "epoch": 0.27, "percentage": 13.34, "elapsed_time": "3:12:20", "remaining_time": "20:49:45"} +{"current_steps": 910, "total_steps": 6748, "loss": 1.4755, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.77897733582322e-05, "epoch": 0.27, "percentage": 13.49, "elapsed_time": "3:14:28", "remaining_time": "20:47:36"} +{"current_steps": 920, "total_steps": 6748, "loss": 1.4476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.774167889434671e-05, "epoch": 0.27, "percentage": 13.63, "elapsed_time": "3:16:31", "remaining_time": "20:44:55"} +{"current_steps": 930, "total_steps": 6748, "loss": 1.4531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.769309151625366e-05, "epoch": 0.28, "percentage": 13.78, "elapsed_time": "3:18:32", "remaining_time": "20:42:03"} +{"current_steps": 940, "total_steps": 6748, "loss": 1.447, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7644012277059516e-05, "epoch": 0.28, "percentage": 13.93, "elapsed_time": "3:20:39", "remaining_time": "20:39:48"} +{"current_steps": 950, "total_steps": 6748, "loss": 1.4201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7594442240531574e-05, "epoch": 0.28, "percentage": 14.08, "elapsed_time": "3:22:49", "remaining_time": "20:37:51"} +{"current_steps": 960, "total_steps": 6748, "loss": 1.4323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.754438248107491e-05, "epoch": 0.28, "percentage": 14.23, "elapsed_time": "3:25:02", "remaining_time": "20:36:14"} +{"current_steps": 970, "total_steps": 6748, "loss": 1.4432, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7493834083709104e-05, "epoch": 0.29, "percentage": 14.37, "elapsed_time": "3:27:14", "remaining_time": "20:34:25"} +{"current_steps": 980, "total_steps": 6748, "loss": 1.4339, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7442798144044695e-05, "epoch": 0.29, "percentage": 14.52, "elapsed_time": "3:29:18", "remaining_time": "20:31:55"} +{"current_steps": 990, "total_steps": 6748, "loss": 1.4477, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.739127576825945e-05, "epoch": 0.29, "percentage": 14.67, "elapsed_time": "3:31:26", "remaining_time": "20:29:43"} +{"current_steps": 1000, "total_steps": 6748, "loss": 1.4242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.733926807307441e-05, "epoch": 0.3, "percentage": 14.82, "elapsed_time": "3:33:38", "remaining_time": "20:28:02"} +{"current_steps": 1010, "total_steps": 6748, "loss": 1.4341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.728677618572965e-05, "epoch": 0.3, "percentage": 14.97, "elapsed_time": "3:35:50", "remaining_time": "20:26:12"} +{"current_steps": 1020, "total_steps": 6748, "loss": 1.4526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.723380124395985e-05, "epoch": 0.3, "percentage": 15.12, "elapsed_time": "3:38:02", "remaining_time": "20:24:29"} +{"current_steps": 1030, "total_steps": 6748, "loss": 1.4402, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7180344395969675e-05, "epoch": 0.31, "percentage": 15.26, "elapsed_time": "3:40:16", "remaining_time": "20:22:51"} +{"current_steps": 1040, "total_steps": 6748, "loss": 1.4257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.712640680040884e-05, "epoch": 0.31, "percentage": 15.41, "elapsed_time": "3:42:29", "remaining_time": "20:21:09"} +{"current_steps": 1050, "total_steps": 6748, "loss": 1.4232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.707198962634701e-05, "epoch": 0.31, "percentage": 15.56, "elapsed_time": "3:44:42", "remaining_time": "20:19:25"} +{"current_steps": 1060, "total_steps": 6748, "loss": 1.4485, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.70170940532485e-05, "epoch": 0.31, "percentage": 15.71, "elapsed_time": "3:46:49", "remaining_time": "20:17:11"} +{"current_steps": 1070, "total_steps": 6748, "loss": 1.456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6961721270946635e-05, "epoch": 0.32, "percentage": 15.86, "elapsed_time": "3:48:51", "remaining_time": "20:14:28"} +{"current_steps": 1080, "total_steps": 6748, "loss": 1.4555, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.690587247961804e-05, "epoch": 0.32, "percentage": 16.0, "elapsed_time": "3:51:00", "remaining_time": "20:12:24"} +{"current_steps": 1090, "total_steps": 6748, "loss": 1.4376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.684954888975657e-05, "epoch": 0.32, "percentage": 16.15, "elapsed_time": "3:53:11", "remaining_time": "20:10:28"} +{"current_steps": 1100, "total_steps": 6748, "loss": 1.4353, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6792751722147104e-05, "epoch": 0.33, "percentage": 16.3, "elapsed_time": "3:55:21", "remaining_time": "20:08:29"} +{"current_steps": 1110, "total_steps": 6748, "loss": 1.4226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6735482207839074e-05, "epoch": 0.33, "percentage": 16.45, "elapsed_time": "3:57:29", "remaining_time": "20:06:16"} +{"current_steps": 1120, "total_steps": 6748, "loss": 1.4315, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6677741588119784e-05, "epoch": 0.33, "percentage": 16.6, "elapsed_time": "3:59:38", "remaining_time": "20:04:13"} +{"current_steps": 1130, "total_steps": 6748, "loss": 1.4303, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.66195311144875e-05, "epoch": 0.33, "percentage": 16.75, "elapsed_time": "4:01:46", "remaining_time": "20:02:02"} +{"current_steps": 1140, "total_steps": 6748, "loss": 1.4288, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6560852048624345e-05, "epoch": 0.34, "percentage": 16.89, "elapsed_time": "4:03:53", "remaining_time": "19:59:45"} +{"current_steps": 1150, "total_steps": 6748, "loss": 1.4539, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.650170566236892e-05, "epoch": 0.34, "percentage": 17.04, "elapsed_time": "4:06:02", "remaining_time": "19:57:42"} +{"current_steps": 1160, "total_steps": 6748, "loss": 1.4527, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6442093237688756e-05, "epoch": 0.34, "percentage": 17.19, "elapsed_time": "4:08:05", "remaining_time": "19:55:07"} +{"current_steps": 1170, "total_steps": 6748, "loss": 1.4406, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6382016066652556e-05, "epoch": 0.35, "percentage": 17.34, "elapsed_time": "4:10:15", "remaining_time": "19:53:04"} +{"current_steps": 1180, "total_steps": 6748, "loss": 1.4233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.632147545140212e-05, "epoch": 0.35, "percentage": 17.49, "elapsed_time": "4:12:20", "remaining_time": "19:50:42"} +{"current_steps": 1190, "total_steps": 6748, "loss": 1.426, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.626047270412419e-05, "epoch": 0.35, "percentage": 17.63, "elapsed_time": "4:14:30", "remaining_time": "19:48:40"} +{"current_steps": 1200, "total_steps": 6748, "loss": 1.4577, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.619900914702198e-05, "epoch": 0.36, "percentage": 17.78, "elapsed_time": "4:16:38", "remaining_time": "19:46:31"} +{"current_steps": 1210, "total_steps": 6748, "loss": 1.4313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.613708611228652e-05, "epoch": 0.36, "percentage": 17.93, "elapsed_time": "4:18:42", "remaining_time": "19:44:04"} +{"current_steps": 1220, "total_steps": 6748, "loss": 1.4129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.607470494206776e-05, "epoch": 0.36, "percentage": 18.08, "elapsed_time": "4:20:53", "remaining_time": "19:42:08"} +{"current_steps": 1230, "total_steps": 6748, "loss": 1.4368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.601186698844554e-05, "epoch": 0.36, "percentage": 18.23, "elapsed_time": "4:23:01", "remaining_time": "19:39:57"} +{"current_steps": 1240, "total_steps": 6748, "loss": 1.4342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.594857361340021e-05, "epoch": 0.37, "percentage": 18.38, "elapsed_time": "4:25:13", "remaining_time": "19:38:06"} +{"current_steps": 1250, "total_steps": 6748, "loss": 1.4438, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.588482618878316e-05, "epoch": 0.37, "percentage": 18.52, "elapsed_time": "4:27:22", "remaining_time": "19:36:03"} +{"current_steps": 1260, "total_steps": 6748, "loss": 1.4263, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.582062609628709e-05, "epoch": 0.37, "percentage": 18.67, "elapsed_time": "4:29:33", "remaining_time": "19:34:04"} +{"current_steps": 1270, "total_steps": 6748, "loss": 1.4379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.575597472741601e-05, "epoch": 0.38, "percentage": 18.82, "elapsed_time": "4:31:40", "remaining_time": "19:31:50"} +{"current_steps": 1280, "total_steps": 6748, "loss": 1.4221, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.569087348345512e-05, "epoch": 0.38, "percentage": 18.97, "elapsed_time": "4:33:53", "remaining_time": "19:30:01"} +{"current_steps": 1290, "total_steps": 6748, "loss": 1.4414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.562532377544046e-05, "epoch": 0.38, "percentage": 19.12, "elapsed_time": "4:36:08", "remaining_time": "19:28:19"} +{"current_steps": 1300, "total_steps": 6748, "loss": 1.4395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5559327024128265e-05, "epoch": 0.39, "percentage": 19.26, "elapsed_time": "4:38:17", "remaining_time": "19:26:15"} +{"current_steps": 1310, "total_steps": 6748, "loss": 1.4278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.549288465996421e-05, "epoch": 0.39, "percentage": 19.41, "elapsed_time": "4:40:27", "remaining_time": "19:24:14"} +{"current_steps": 1320, "total_steps": 6748, "loss": 1.4344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.542599812305243e-05, "epoch": 0.39, "percentage": 19.56, "elapsed_time": "4:42:30", "remaining_time": "19:21:44"} +{"current_steps": 1330, "total_steps": 6748, "loss": 1.4352, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.535866886312423e-05, "epoch": 0.39, "percentage": 19.71, "elapsed_time": "4:44:40", "remaining_time": "19:19:40"} +{"current_steps": 1340, "total_steps": 6748, "loss": 1.4133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.529089833950675e-05, "epoch": 0.4, "percentage": 19.86, "elapsed_time": "4:46:48", "remaining_time": "19:17:29"} +{"current_steps": 1350, "total_steps": 6748, "loss": 1.4506, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5222688021091266e-05, "epoch": 0.4, "percentage": 20.01, "elapsed_time": "4:48:51", "remaining_time": "19:15:01"} +{"current_steps": 1360, "total_steps": 6748, "loss": 1.4295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5154039386301385e-05, "epoch": 0.4, "percentage": 20.15, "elapsed_time": "4:50:58", "remaining_time": "19:12:48"} +{"current_steps": 1370, "total_steps": 6748, "loss": 1.4389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5084953923061016e-05, "epoch": 0.41, "percentage": 20.3, "elapsed_time": "4:53:08", "remaining_time": "19:10:45"} +{"current_steps": 1380, "total_steps": 6748, "loss": 1.4247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5015433128762065e-05, "epoch": 0.41, "percentage": 20.45, "elapsed_time": "4:55:18", "remaining_time": "19:08:44"} +{"current_steps": 1390, "total_steps": 6748, "loss": 1.4347, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.494547851023205e-05, "epoch": 0.41, "percentage": 20.6, "elapsed_time": "4:57:25", "remaining_time": "19:06:29"} +{"current_steps": 1400, "total_steps": 6748, "loss": 1.4133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.487509158370139e-05, "epoch": 0.41, "percentage": 20.75, "elapsed_time": "4:59:34", "remaining_time": "19:04:21"} +{"current_steps": 1410, "total_steps": 6748, "loss": 1.4296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.480427387477056e-05, "epoch": 0.42, "percentage": 20.9, "elapsed_time": "5:01:40", "remaining_time": "19:02:05"} +{"current_steps": 1420, "total_steps": 6748, "loss": 1.4353, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.473302691837702e-05, "epoch": 0.42, "percentage": 21.04, "elapsed_time": "5:03:45", "remaining_time": "18:59:45"} +{"current_steps": 1430, "total_steps": 6748, "loss": 1.4377, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.466135225876194e-05, "epoch": 0.42, "percentage": 21.19, "elapsed_time": "5:05:54", "remaining_time": "18:57:37"} +{"current_steps": 1440, "total_steps": 6748, "loss": 1.4168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.458925144943676e-05, "epoch": 0.43, "percentage": 21.34, "elapsed_time": "5:08:02", "remaining_time": "18:55:28"} +{"current_steps": 1450, "total_steps": 6748, "loss": 1.4334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.451672605314948e-05, "epoch": 0.43, "percentage": 21.49, "elapsed_time": "5:10:11", "remaining_time": "18:53:22"} +{"current_steps": 1460, "total_steps": 6748, "loss": 1.44, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.444377764185082e-05, "epoch": 0.43, "percentage": 21.64, "elapsed_time": "5:12:17", "remaining_time": "18:51:05"} +{"current_steps": 1470, "total_steps": 6748, "loss": 1.4375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.43704077966601e-05, "epoch": 0.44, "percentage": 21.78, "elapsed_time": "5:14:29", "remaining_time": "18:49:10"} +{"current_steps": 1480, "total_steps": 6748, "loss": 1.447, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4296618107831036e-05, "epoch": 0.44, "percentage": 21.93, "elapsed_time": "5:16:38", "remaining_time": "18:47:02"} +{"current_steps": 1490, "total_steps": 6748, "loss": 1.4151, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.422241017471722e-05, "epoch": 0.44, "percentage": 22.08, "elapsed_time": "5:18:51", "remaining_time": "18:45:12"} +{"current_steps": 1500, "total_steps": 6748, "loss": 1.4388, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.414778560573749e-05, "epoch": 0.44, "percentage": 22.23, "elapsed_time": "5:20:58", "remaining_time": "18:42:59"} +{"current_steps": 1510, "total_steps": 6748, "loss": 1.4228, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4072746018341036e-05, "epoch": 0.45, "percentage": 22.38, "elapsed_time": "5:23:16", "remaining_time": "18:41:22"} +{"current_steps": 1520, "total_steps": 6748, "loss": 1.4104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.399729303897238e-05, "epoch": 0.45, "percentage": 22.53, "elapsed_time": "5:25:23", "remaining_time": "18:39:09"} +{"current_steps": 1530, "total_steps": 6748, "loss": 1.4441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.392142830303608e-05, "epoch": 0.45, "percentage": 22.67, "elapsed_time": "5:27:33", "remaining_time": "18:37:09"} +{"current_steps": 1540, "total_steps": 6748, "loss": 1.4282, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.384515345486131e-05, "epoch": 0.46, "percentage": 22.82, "elapsed_time": "5:29:42", "remaining_time": "18:35:00"} +{"current_steps": 1550, "total_steps": 6748, "loss": 1.4271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.376847014766623e-05, "epoch": 0.46, "percentage": 22.97, "elapsed_time": "5:31:53", "remaining_time": "18:32:59"} +{"current_steps": 1560, "total_steps": 6748, "loss": 1.4223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.369138004352212e-05, "epoch": 0.46, "percentage": 23.12, "elapsed_time": "5:34:02", "remaining_time": "18:30:52"} +{"current_steps": 1570, "total_steps": 6748, "loss": 1.425, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3613884813317406e-05, "epoch": 0.47, "percentage": 23.27, "elapsed_time": "5:36:07", "remaining_time": "18:28:33"} +{"current_steps": 1580, "total_steps": 6748, "loss": 1.4392, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3535986136721377e-05, "epoch": 0.47, "percentage": 23.41, "elapsed_time": "5:38:15", "remaining_time": "18:26:25"} +{"current_steps": 1590, "total_steps": 6748, "loss": 1.4097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3457685702147834e-05, "epoch": 0.47, "percentage": 23.56, "elapsed_time": "5:40:17", "remaining_time": "18:23:54"} +{"current_steps": 1600, "total_steps": 6748, "loss": 1.4405, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3378985206718484e-05, "epoch": 0.47, "percentage": 23.71, "elapsed_time": "5:42:21", "remaining_time": "18:21:31"} +{"current_steps": 1610, "total_steps": 6748, "loss": 1.4311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.329988635622611e-05, "epoch": 0.48, "percentage": 23.86, "elapsed_time": "5:44:29", "remaining_time": "18:19:22"} +{"current_steps": 1620, "total_steps": 6748, "loss": 1.4358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.322039086509769e-05, "epoch": 0.48, "percentage": 24.01, "elapsed_time": "5:46:34", "remaining_time": "18:17:03"} +{"current_steps": 1630, "total_steps": 6748, "loss": 1.4114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3140500456357145e-05, "epoch": 0.48, "percentage": 24.16, "elapsed_time": "5:48:41", "remaining_time": "18:14:51"} +{"current_steps": 1640, "total_steps": 6748, "loss": 1.4165, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.306021686158805e-05, "epoch": 0.49, "percentage": 24.3, "elapsed_time": "5:50:43", "remaining_time": "18:12:21"} +{"current_steps": 1650, "total_steps": 6748, "loss": 1.4309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.297954182089609e-05, "epoch": 0.49, "percentage": 24.45, "elapsed_time": "5:52:50", "remaining_time": "18:10:10"} +{"current_steps": 1660, "total_steps": 6748, "loss": 1.4215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.289847708287129e-05, "epoch": 0.49, "percentage": 24.6, "elapsed_time": "5:54:55", "remaining_time": "18:07:52"} +{"current_steps": 1670, "total_steps": 6748, "loss": 1.4124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2817024404550246e-05, "epoch": 0.49, "percentage": 24.75, "elapsed_time": "5:57:05", "remaining_time": "18:05:47"} +{"current_steps": 1680, "total_steps": 6748, "loss": 1.4001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2735185551377895e-05, "epoch": 0.5, "percentage": 24.9, "elapsed_time": "5:59:11", "remaining_time": "18:03:34"} +{"current_steps": 1690, "total_steps": 6748, "loss": 1.4302, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.265296229716935e-05, "epoch": 0.5, "percentage": 25.04, "elapsed_time": "6:01:20", "remaining_time": "18:01:27"} +{"current_steps": 1700, "total_steps": 6748, "loss": 1.4211, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.25703564240714e-05, "epoch": 0.5, "percentage": 25.19, "elapsed_time": "6:03:30", "remaining_time": "17:59:23"} +{"current_steps": 1710, "total_steps": 6748, "loss": 1.4423, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2487369722523906e-05, "epoch": 0.51, "percentage": 25.34, "elapsed_time": "6:05:30", "remaining_time": "17:56:52"} +{"current_steps": 1720, "total_steps": 6748, "loss": 1.4299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.240400399122101e-05, "epoch": 0.51, "percentage": 25.49, "elapsed_time": "6:07:41", "remaining_time": "17:54:51"} +{"current_steps": 1730, "total_steps": 6748, "loss": 1.4214, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.232026103707209e-05, "epoch": 0.51, "percentage": 25.64, "elapsed_time": "6:09:52", "remaining_time": "17:52:51"} +{"current_steps": 1740, "total_steps": 6748, "loss": 1.4348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.223614267516268e-05, "epoch": 0.52, "percentage": 25.79, "elapsed_time": "6:11:57", "remaining_time": "17:50:33"} +{"current_steps": 1750, "total_steps": 6748, "loss": 1.4315, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.215165072871505e-05, "epoch": 0.52, "percentage": 25.93, "elapsed_time": "6:14:04", "remaining_time": "17:48:22"} +{"current_steps": 1760, "total_steps": 6748, "loss": 1.4098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.206678702904874e-05, "epoch": 0.52, "percentage": 26.08, "elapsed_time": "6:16:14", "remaining_time": "17:46:18"} +{"current_steps": 1770, "total_steps": 6748, "loss": 1.4242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.198155341554084e-05, "epoch": 0.52, "percentage": 26.23, "elapsed_time": "6:18:24", "remaining_time": "17:44:13"} +{"current_steps": 1780, "total_steps": 6748, "loss": 1.4272, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1895951735586145e-05, "epoch": 0.53, "percentage": 26.38, "elapsed_time": "6:20:27", "remaining_time": "17:41:51"} +{"current_steps": 1790, "total_steps": 6748, "loss": 1.4452, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1809983844557085e-05, "epoch": 0.53, "percentage": 26.53, "elapsed_time": "6:22:37", "remaining_time": "17:39:47"} +{"current_steps": 1800, "total_steps": 6748, "loss": 1.431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.172365160576355e-05, "epoch": 0.53, "percentage": 26.67, "elapsed_time": "6:24:43", "remaining_time": "17:37:34"} +{"current_steps": 1810, "total_steps": 6748, "loss": 1.4389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.163695689041245e-05, "epoch": 0.54, "percentage": 26.82, "elapsed_time": "6:26:47", "remaining_time": "17:35:15"} +{"current_steps": 1820, "total_steps": 6748, "loss": 1.413, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.154990157756722e-05, "epoch": 0.54, "percentage": 26.97, "elapsed_time": "6:29:00", "remaining_time": "17:33:18"} +{"current_steps": 1830, "total_steps": 6748, "loss": 1.3893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1462487554107036e-05, "epoch": 0.54, "percentage": 27.12, "elapsed_time": "6:31:11", "remaining_time": "17:31:18"} +{"current_steps": 1840, "total_steps": 6748, "loss": 1.4052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.137471671468596e-05, "epoch": 0.55, "percentage": 27.27, "elapsed_time": "6:33:24", "remaining_time": "17:29:21"} +{"current_steps": 1850, "total_steps": 6748, "loss": 1.4173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.128659096169183e-05, "epoch": 0.55, "percentage": 27.42, "elapsed_time": "6:35:28", "remaining_time": "17:27:02"} +{"current_steps": 1860, "total_steps": 6748, "loss": 1.4012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1198112205205096e-05, "epoch": 0.55, "percentage": 27.56, "elapsed_time": "6:37:32", "remaining_time": "17:24:42"} +{"current_steps": 1870, "total_steps": 6748, "loss": 1.4119, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.110928236295734e-05, "epoch": 0.55, "percentage": 27.71, "elapsed_time": "6:39:41", "remaining_time": "17:22:37"} +{"current_steps": 1880, "total_steps": 6748, "loss": 1.4111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.102010336028975e-05, "epoch": 0.56, "percentage": 27.86, "elapsed_time": "6:41:56", "remaining_time": "17:20:47"} +{"current_steps": 1890, "total_steps": 6748, "loss": 1.4156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0930577130111424e-05, "epoch": 0.56, "percentage": 28.01, "elapsed_time": "6:44:00", "remaining_time": "17:18:27"} +{"current_steps": 1900, "total_steps": 6748, "loss": 1.4419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.084070561285739e-05, "epoch": 0.56, "percentage": 28.16, "elapsed_time": "6:46:03", "remaining_time": "17:16:04"} +{"current_steps": 1910, "total_steps": 6748, "loss": 1.4121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0750490756446624e-05, "epoch": 0.57, "percentage": 28.3, "elapsed_time": "6:48:12", "remaining_time": "17:13:58"} +{"current_steps": 1920, "total_steps": 6748, "loss": 1.4204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0659934516239795e-05, "epoch": 0.57, "percentage": 28.45, "elapsed_time": "6:50:18", "remaining_time": "17:11:44"} +{"current_steps": 1930, "total_steps": 6748, "loss": 1.4032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.056903885499689e-05, "epoch": 0.57, "percentage": 28.6, "elapsed_time": "6:52:24", "remaining_time": "17:09:31"} +{"current_steps": 1940, "total_steps": 6748, "loss": 1.4207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.047780574283466e-05, "epoch": 0.57, "percentage": 28.75, "elapsed_time": "6:54:32", "remaining_time": "17:07:22"} +{"current_steps": 1950, "total_steps": 6748, "loss": 1.4095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.038623715718397e-05, "epoch": 0.58, "percentage": 28.9, "elapsed_time": "6:56:43", "remaining_time": "17:05:20"} +{"current_steps": 1960, "total_steps": 6748, "loss": 1.4228, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.029433508274686e-05, "epoch": 0.58, "percentage": 29.05, "elapsed_time": "6:58:50", "remaining_time": "17:03:09"} +{"current_steps": 1970, "total_steps": 6748, "loss": 1.4141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0202101511453586e-05, "epoch": 0.58, "percentage": 29.19, "elapsed_time": "7:00:54", "remaining_time": "17:00:50"} +{"current_steps": 1980, "total_steps": 6748, "loss": 1.4323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.010953844241943e-05, "epoch": 0.59, "percentage": 29.34, "elapsed_time": "7:02:57", "remaining_time": "16:58:31"} +{"current_steps": 1990, "total_steps": 6748, "loss": 1.4087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.001664788190135e-05, "epoch": 0.59, "percentage": 29.49, "elapsed_time": "7:05:06", "remaining_time": "16:56:25"} +{"current_steps": 2000, "total_steps": 6748, "loss": 1.4186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.992343184325453e-05, "epoch": 0.59, "percentage": 29.64, "elapsed_time": "7:07:08", "remaining_time": "16:54:02"} +{"current_steps": 2010, "total_steps": 6748, "loss": 1.4264, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.982989234688873e-05, "epoch": 0.6, "percentage": 29.79, "elapsed_time": "7:09:21", "remaining_time": "16:52:05"} +{"current_steps": 2020, "total_steps": 6748, "loss": 1.4417, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.973603142022448e-05, "epoch": 0.6, "percentage": 29.93, "elapsed_time": "7:11:27", "remaining_time": "16:49:52"} +{"current_steps": 2030, "total_steps": 6748, "loss": 1.4075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.964185109764915e-05, "epoch": 0.6, "percentage": 30.08, "elapsed_time": "7:13:38", "remaining_time": "16:47:51"} +{"current_steps": 2040, "total_steps": 6748, "loss": 1.4143, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.954735342047285e-05, "epoch": 0.6, "percentage": 30.23, "elapsed_time": "7:15:44", "remaining_time": "16:45:36"} +{"current_steps": 2050, "total_steps": 6748, "loss": 1.4176, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.945254043688419e-05, "epoch": 0.61, "percentage": 30.38, "elapsed_time": "7:17:58", "remaining_time": "16:43:43"} +{"current_steps": 2060, "total_steps": 6748, "loss": 1.4214, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.935741420190587e-05, "epoch": 0.61, "percentage": 30.53, "elapsed_time": "7:20:07", "remaining_time": "16:41:36"} +{"current_steps": 2070, "total_steps": 6748, "loss": 1.4256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.926197677735018e-05, "epoch": 0.61, "percentage": 30.68, "elapsed_time": "7:22:15", "remaining_time": "16:39:27"} +{"current_steps": 2080, "total_steps": 6748, "loss": 1.4075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9166230231774276e-05, "epoch": 0.62, "percentage": 30.82, "elapsed_time": "7:24:26", "remaining_time": "16:37:26"} +{"current_steps": 2090, "total_steps": 6748, "loss": 1.3887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9070176640435335e-05, "epoch": 0.62, "percentage": 30.97, "elapsed_time": "7:26:33", "remaining_time": "16:35:14"} +{"current_steps": 2100, "total_steps": 6748, "loss": 1.4225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.897381808524562e-05, "epoch": 0.62, "percentage": 31.12, "elapsed_time": "7:28:37", "remaining_time": "16:32:56"} +{"current_steps": 2110, "total_steps": 6748, "loss": 1.4114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.887715665472729e-05, "epoch": 0.63, "percentage": 31.27, "elapsed_time": "7:30:45", "remaining_time": "16:30:49"} +{"current_steps": 2120, "total_steps": 6748, "loss": 1.4316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8780194443967226e-05, "epoch": 0.63, "percentage": 31.42, "elapsed_time": "7:32:47", "remaining_time": "16:28:27"} +{"current_steps": 2130, "total_steps": 6748, "loss": 1.4168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8682933554571524e-05, "epoch": 0.63, "percentage": 31.56, "elapsed_time": "7:34:57", "remaining_time": "16:26:22"} +{"current_steps": 2140, "total_steps": 6748, "loss": 1.4237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.858537609461999e-05, "epoch": 0.63, "percentage": 31.71, "elapsed_time": "7:37:12", "remaining_time": "16:24:30"} +{"current_steps": 2150, "total_steps": 6748, "loss": 1.4373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8487524178620464e-05, "epoch": 0.64, "percentage": 31.86, "elapsed_time": "7:39:15", "remaining_time": "16:22:11"} +{"current_steps": 2160, "total_steps": 6748, "loss": 1.4089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.838937992746295e-05, "epoch": 0.64, "percentage": 32.01, "elapsed_time": "7:41:18", "remaining_time": "16:19:51"} +{"current_steps": 2170, "total_steps": 6748, "loss": 1.4319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8290945468373684e-05, "epoch": 0.64, "percentage": 32.16, "elapsed_time": "7:43:21", "remaining_time": "16:17:31"} +{"current_steps": 2180, "total_steps": 6748, "loss": 1.4035, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8192222934869e-05, "epoch": 0.65, "percentage": 32.31, "elapsed_time": "7:45:30", "remaining_time": "16:15:26"} +{"current_steps": 2190, "total_steps": 6748, "loss": 1.4161, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.809321446670909e-05, "epoch": 0.65, "percentage": 32.45, "elapsed_time": "7:47:40", "remaining_time": "16:13:21"} +{"current_steps": 2200, "total_steps": 6748, "loss": 1.4136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.799392220985164e-05, "epoch": 0.65, "percentage": 32.6, "elapsed_time": "7:49:49", "remaining_time": "16:11:16"} +{"current_steps": 2210, "total_steps": 6748, "loss": 1.4188, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.789434831640533e-05, "epoch": 0.65, "percentage": 32.75, "elapsed_time": "7:51:58", "remaining_time": "16:09:09"} +{"current_steps": 2220, "total_steps": 6748, "loss": 1.4203, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.779449494458312e-05, "epoch": 0.66, "percentage": 32.9, "elapsed_time": "7:54:06", "remaining_time": "16:07:01"} +{"current_steps": 2230, "total_steps": 6748, "loss": 1.4263, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.769436425865557e-05, "epoch": 0.66, "percentage": 33.05, "elapsed_time": "7:56:11", "remaining_time": "16:04:46"} +{"current_steps": 2240, "total_steps": 6748, "loss": 1.4295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.759395842890384e-05, "epoch": 0.66, "percentage": 33.2, "elapsed_time": "7:58:14", "remaining_time": "16:02:27"} +{"current_steps": 2250, "total_steps": 6748, "loss": 1.4144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.749327963157274e-05, "epoch": 0.67, "percentage": 33.34, "elapsed_time": "8:00:28", "remaining_time": "16:00:30"} +{"current_steps": 2260, "total_steps": 6748, "loss": 1.4162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.739233004882346e-05, "epoch": 0.67, "percentage": 33.49, "elapsed_time": "8:02:40", "remaining_time": "15:58:30"} +{"current_steps": 2270, "total_steps": 6748, "loss": 1.4099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.729111186868635e-05, "epoch": 0.67, "percentage": 33.64, "elapsed_time": "8:04:47", "remaining_time": "15:56:21"} +{"current_steps": 2280, "total_steps": 6748, "loss": 1.3878, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.718962728501348e-05, "epoch": 0.68, "percentage": 33.79, "elapsed_time": "8:06:58", "remaining_time": "15:54:17"} +{"current_steps": 2290, "total_steps": 6748, "loss": 1.4399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.708787849743106e-05, "epoch": 0.68, "percentage": 33.94, "elapsed_time": "8:09:07", "remaining_time": "15:52:12"} +{"current_steps": 2300, "total_steps": 6748, "loss": 1.4249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.69858677112918e-05, "epoch": 0.68, "percentage": 34.08, "elapsed_time": "8:11:12", "remaining_time": "15:49:57"} +{"current_steps": 2310, "total_steps": 6748, "loss": 1.3925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.688359713762707e-05, "epoch": 0.68, "percentage": 34.23, "elapsed_time": "8:13:14", "remaining_time": "15:47:38"} +{"current_steps": 2320, "total_steps": 6748, "loss": 1.4036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6781068993099034e-05, "epoch": 0.69, "percentage": 34.38, "elapsed_time": "8:15:27", "remaining_time": "15:45:39"} +{"current_steps": 2330, "total_steps": 6748, "loss": 1.3986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.667828549995255e-05, "epoch": 0.69, "percentage": 34.53, "elapsed_time": "8:17:32", "remaining_time": "15:43:25"} +{"current_steps": 2340, "total_steps": 6748, "loss": 1.4298, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.657524888596703e-05, "epoch": 0.69, "percentage": 34.68, "elapsed_time": "8:19:43", "remaining_time": "15:41:20"} +{"current_steps": 2350, "total_steps": 6748, "loss": 1.4016, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6471961384408155e-05, "epoch": 0.7, "percentage": 34.83, "elapsed_time": "8:22:02", "remaining_time": "15:39:33"} +{"current_steps": 2360, "total_steps": 6748, "loss": 1.3992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.636842523397945e-05, "epoch": 0.7, "percentage": 34.97, "elapsed_time": "8:24:13", "remaining_time": "15:37:30"} +{"current_steps": 2370, "total_steps": 6748, "loss": 1.4441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.626464267877381e-05, "epoch": 0.7, "percentage": 35.12, "elapsed_time": "8:26:18", "remaining_time": "15:35:16"} +{"current_steps": 2380, "total_steps": 6748, "loss": 1.3967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.616061596822478e-05, "epoch": 0.71, "percentage": 35.27, "elapsed_time": "8:28:28", "remaining_time": "15:33:12"} +{"current_steps": 2390, "total_steps": 6748, "loss": 1.4252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6056347357057893e-05, "epoch": 0.71, "percentage": 35.42, "elapsed_time": "8:30:35", "remaining_time": "15:31:02"} +{"current_steps": 2400, "total_steps": 6748, "loss": 1.4209, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.595183910524173e-05, "epoch": 0.71, "percentage": 35.57, "elapsed_time": "8:32:40", "remaining_time": "15:28:48"} +{"current_steps": 2410, "total_steps": 6748, "loss": 1.4133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5847093477938956e-05, "epoch": 0.71, "percentage": 35.71, "elapsed_time": "8:34:53", "remaining_time": "15:26:47"} +{"current_steps": 2420, "total_steps": 6748, "loss": 1.4313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5742112745457235e-05, "epoch": 0.72, "percentage": 35.86, "elapsed_time": "8:36:54", "remaining_time": "15:24:27"} +{"current_steps": 2430, "total_steps": 6748, "loss": 1.4275, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.563689918320002e-05, "epoch": 0.72, "percentage": 36.01, "elapsed_time": "8:39:05", "remaining_time": "15:22:24"} +{"current_steps": 2440, "total_steps": 6748, "loss": 1.421, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5531455071617226e-05, "epoch": 0.72, "percentage": 36.16, "elapsed_time": "8:41:11", "remaining_time": "15:20:11"} +{"current_steps": 2450, "total_steps": 6748, "loss": 1.4402, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.542578269615579e-05, "epoch": 0.73, "percentage": 36.31, "elapsed_time": "8:43:17", "remaining_time": "15:17:59"} +{"current_steps": 2460, "total_steps": 6748, "loss": 1.4176, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5319884347210186e-05, "epoch": 0.73, "percentage": 36.46, "elapsed_time": "8:45:21", "remaining_time": "15:15:44"} +{"current_steps": 2470, "total_steps": 6748, "loss": 1.4117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.521376232007271e-05, "epoch": 0.73, "percentage": 36.6, "elapsed_time": "8:47:26", "remaining_time": "15:13:30"} +{"current_steps": 2480, "total_steps": 6748, "loss": 1.41, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5107418914883794e-05, "epoch": 0.73, "percentage": 36.75, "elapsed_time": "8:49:31", "remaining_time": "15:11:17"} +{"current_steps": 2490, "total_steps": 6748, "loss": 1.4313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.500085643658211e-05, "epoch": 0.74, "percentage": 36.9, "elapsed_time": "8:51:37", "remaining_time": "15:09:06"} +{"current_steps": 2500, "total_steps": 6748, "loss": 1.4035, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.489407719485464e-05, "epoch": 0.74, "percentage": 37.05, "elapsed_time": "8:53:49", "remaining_time": "15:07:05"} +{"current_steps": 2510, "total_steps": 6748, "loss": 1.4057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4787083504086605e-05, "epoch": 0.74, "percentage": 37.2, "elapsed_time": "8:55:57", "remaining_time": "15:04:56"} +{"current_steps": 2520, "total_steps": 6748, "loss": 1.4125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.467987768331127e-05, "epoch": 0.75, "percentage": 37.34, "elapsed_time": "8:58:02", "remaining_time": "15:02:42"} +{"current_steps": 2530, "total_steps": 6748, "loss": 1.4056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.457246205615974e-05, "epoch": 0.75, "percentage": 37.49, "elapsed_time": "9:00:08", "remaining_time": "15:00:31"} +{"current_steps": 2540, "total_steps": 6748, "loss": 1.4082, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.446483895081054e-05, "epoch": 0.75, "percentage": 37.64, "elapsed_time": "9:02:19", "remaining_time": "14:58:28"} +{"current_steps": 2550, "total_steps": 6748, "loss": 1.3915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4357010699939215e-05, "epoch": 0.76, "percentage": 37.79, "elapsed_time": "9:04:22", "remaining_time": "14:56:11"} +{"current_steps": 2560, "total_steps": 6748, "loss": 1.4012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.424897964066769e-05, "epoch": 0.76, "percentage": 37.94, "elapsed_time": "9:06:35", "remaining_time": "14:54:10"} +{"current_steps": 2570, "total_steps": 6748, "loss": 1.4251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4140748114513685e-05, "epoch": 0.76, "percentage": 38.09, "elapsed_time": "9:08:42", "remaining_time": "14:52:01"} +{"current_steps": 2580, "total_steps": 6748, "loss": 1.4013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.403231846733994e-05, "epoch": 0.76, "percentage": 38.23, "elapsed_time": "9:10:51", "remaining_time": "14:49:54"} +{"current_steps": 2590, "total_steps": 6748, "loss": 1.4076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.392369304930334e-05, "epoch": 0.77, "percentage": 38.38, "elapsed_time": "9:13:01", "remaining_time": "14:47:50"} +{"current_steps": 2600, "total_steps": 6748, "loss": 1.3978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3814874214804034e-05, "epoch": 0.77, "percentage": 38.53, "elapsed_time": "9:15:07", "remaining_time": "14:45:38"} +{"current_steps": 2610, "total_steps": 6748, "loss": 1.408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3705864322434354e-05, "epoch": 0.77, "percentage": 38.68, "elapsed_time": "9:17:19", "remaining_time": "14:43:36"} +{"current_steps": 2620, "total_steps": 6748, "loss": 1.3888, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.359666573492772e-05, "epoch": 0.78, "percentage": 38.83, "elapsed_time": "9:19:31", "remaining_time": "14:41:34"} +{"current_steps": 2630, "total_steps": 6748, "loss": 1.4052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3487280819107415e-05, "epoch": 0.78, "percentage": 38.97, "elapsed_time": "9:21:43", "remaining_time": "14:39:32"} +{"current_steps": 2640, "total_steps": 6748, "loss": 1.4286, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.33777119458353e-05, "epoch": 0.78, "percentage": 39.12, "elapsed_time": "9:23:47", "remaining_time": "14:37:17"} +{"current_steps": 2650, "total_steps": 6748, "loss": 1.4241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.326796148996042e-05, "epoch": 0.79, "percentage": 39.27, "elapsed_time": "9:25:59", "remaining_time": "14:35:15"} +{"current_steps": 2660, "total_steps": 6748, "loss": 1.4049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.315803183026753e-05, "epoch": 0.79, "percentage": 39.42, "elapsed_time": "9:28:07", "remaining_time": "14:33:06"} +{"current_steps": 2670, "total_steps": 6748, "loss": 1.3826, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.304792534942553e-05, "epoch": 0.79, "percentage": 39.57, "elapsed_time": "9:30:19", "remaining_time": "14:31:04"} +{"current_steps": 2680, "total_steps": 6748, "loss": 1.413, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.293764443393582e-05, "epoch": 0.79, "percentage": 39.72, "elapsed_time": "9:32:27", "remaining_time": "14:28:56"} +{"current_steps": 2690, "total_steps": 6748, "loss": 1.4161, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2827191474080605e-05, "epoch": 0.8, "percentage": 39.86, "elapsed_time": "9:34:35", "remaining_time": "14:26:48"} +{"current_steps": 2700, "total_steps": 6748, "loss": 1.382, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2716568863871044e-05, "epoch": 0.8, "percentage": 40.01, "elapsed_time": "9:36:38", "remaining_time": "14:24:32"} +{"current_steps": 2710, "total_steps": 6748, "loss": 1.381, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.260577900099539e-05, "epoch": 0.8, "percentage": 40.16, "elapsed_time": "9:38:46", "remaining_time": "14:22:23"} +{"current_steps": 2720, "total_steps": 6748, "loss": 1.396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2494824286767e-05, "epoch": 0.81, "percentage": 40.31, "elapsed_time": "9:40:59", "remaining_time": "14:20:22"} +{"current_steps": 2730, "total_steps": 6748, "loss": 1.3923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2383707126072315e-05, "epoch": 0.81, "percentage": 40.46, "elapsed_time": "9:43:03", "remaining_time": "14:18:09"} +{"current_steps": 2740, "total_steps": 6748, "loss": 1.4044, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2272429927318707e-05, "epoch": 0.81, "percentage": 40.6, "elapsed_time": "9:45:06", "remaining_time": "14:15:52"} +{"current_steps": 2750, "total_steps": 6748, "loss": 1.4073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.21609951023823e-05, "epoch": 0.81, "percentage": 40.75, "elapsed_time": "9:47:10", "remaining_time": "14:13:38"} +{"current_steps": 2760, "total_steps": 6748, "loss": 1.4178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.204940506655568e-05, "epoch": 0.82, "percentage": 40.9, "elapsed_time": "9:49:16", "remaining_time": "14:11:27"} +{"current_steps": 2770, "total_steps": 6748, "loss": 1.4179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1937662238495544e-05, "epoch": 0.82, "percentage": 41.05, "elapsed_time": "9:51:26", "remaining_time": "14:09:22"} +{"current_steps": 2780, "total_steps": 6748, "loss": 1.4003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1825769040170285e-05, "epoch": 0.82, "percentage": 41.2, "elapsed_time": "9:53:38", "remaining_time": "14:07:20"} +{"current_steps": 2790, "total_steps": 6748, "loss": 1.4176, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1713727896807505e-05, "epoch": 0.83, "percentage": 41.35, "elapsed_time": "9:55:42", "remaining_time": "14:05:05"} +{"current_steps": 2800, "total_steps": 6748, "loss": 1.4179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.160154123684143e-05, "epoch": 0.83, "percentage": 41.49, "elapsed_time": "9:57:51", "remaining_time": "14:02:58"} +{"current_steps": 2810, "total_steps": 6748, "loss": 1.4098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1489211491860276e-05, "epoch": 0.83, "percentage": 41.64, "elapsed_time": "10:00:02", "remaining_time": "14:00:54"} +{"current_steps": 2820, "total_steps": 6748, "loss": 1.4087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1376741096553576e-05, "epoch": 0.84, "percentage": 41.79, "elapsed_time": "10:02:04", "remaining_time": "13:58:38"} +{"current_steps": 2830, "total_steps": 6748, "loss": 1.3971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.126413248865935e-05, "epoch": 0.84, "percentage": 41.94, "elapsed_time": "10:04:11", "remaining_time": "13:56:28"} +{"current_steps": 2840, "total_steps": 6748, "loss": 1.3915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.115138810891134e-05, "epoch": 0.84, "percentage": 42.09, "elapsed_time": "10:06:18", "remaining_time": "13:54:19"} +{"current_steps": 2850, "total_steps": 6748, "loss": 1.4041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.103851040098607e-05, "epoch": 0.84, "percentage": 42.23, "elapsed_time": "10:08:28", "remaining_time": "13:52:14"} +{"current_steps": 2860, "total_steps": 6748, "loss": 1.4129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0925501811449855e-05, "epoch": 0.85, "percentage": 42.38, "elapsed_time": "10:10:36", "remaining_time": "13:50:05"} +{"current_steps": 2870, "total_steps": 6748, "loss": 1.3948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.081236478970583e-05, "epoch": 0.85, "percentage": 42.53, "elapsed_time": "10:12:39", "remaining_time": "13:47:50"} +{"current_steps": 2880, "total_steps": 6748, "loss": 1.4116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.069910178794082e-05, "epoch": 0.85, "percentage": 42.68, "elapsed_time": "10:14:41", "remaining_time": "13:45:34"} +{"current_steps": 2890, "total_steps": 6748, "loss": 1.4029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0585715261072206e-05, "epoch": 0.86, "percentage": 42.83, "elapsed_time": "10:16:52", "remaining_time": "13:43:30"} +{"current_steps": 2900, "total_steps": 6748, "loss": 1.399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.04722076666947e-05, "epoch": 0.86, "percentage": 42.98, "elapsed_time": "10:18:58", "remaining_time": "13:41:18"} +{"current_steps": 2910, "total_steps": 6748, "loss": 1.4061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0358581465027125e-05, "epoch": 0.86, "percentage": 43.12, "elapsed_time": "10:21:03", "remaining_time": "13:39:07"} +{"current_steps": 2920, "total_steps": 6748, "loss": 1.4152, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.024483911885901e-05, "epoch": 0.87, "percentage": 43.27, "elapsed_time": "10:23:05", "remaining_time": "13:36:50"} +{"current_steps": 2930, "total_steps": 6748, "loss": 1.4257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.013098309349729e-05, "epoch": 0.87, "percentage": 43.42, "elapsed_time": "10:25:13", "remaining_time": "13:34:42"} +{"current_steps": 2940, "total_steps": 6748, "loss": 1.417, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0017015856712814e-05, "epoch": 0.87, "percentage": 43.57, "elapsed_time": "10:27:27", "remaining_time": "13:32:42"} +{"current_steps": 2950, "total_steps": 6748, "loss": 1.3952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9902939878686915e-05, "epoch": 0.87, "percentage": 43.72, "elapsed_time": "10:29:32", "remaining_time": "13:30:30"} +{"current_steps": 2960, "total_steps": 6748, "loss": 1.4252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.978875763195779e-05, "epoch": 0.88, "percentage": 43.86, "elapsed_time": "10:31:42", "remaining_time": "13:28:24"} +{"current_steps": 2970, "total_steps": 6748, "loss": 1.3982, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9674471591367005e-05, "epoch": 0.88, "percentage": 44.01, "elapsed_time": "10:33:48", "remaining_time": "13:26:14"} +{"current_steps": 2980, "total_steps": 6748, "loss": 1.3948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9560084234005765e-05, "epoch": 0.88, "percentage": 44.16, "elapsed_time": "10:35:53", "remaining_time": "13:24:02"} +{"current_steps": 2990, "total_steps": 6748, "loss": 1.4127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.944559803916128e-05, "epoch": 0.89, "percentage": 44.31, "elapsed_time": "10:37:55", "remaining_time": "13:21:47"} +{"current_steps": 3000, "total_steps": 6748, "loss": 1.4239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9331015488263024e-05, "epoch": 0.89, "percentage": 44.46, "elapsed_time": "10:40:00", "remaining_time": "13:19:35"} +{"current_steps": 3010, "total_steps": 6748, "loss": 1.3889, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9216339064828914e-05, "epoch": 0.89, "percentage": 44.61, "elapsed_time": "10:42:09", "remaining_time": "13:17:27"} +{"current_steps": 3020, "total_steps": 6748, "loss": 1.403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.910157125441152e-05, "epoch": 0.89, "percentage": 44.75, "elapsed_time": "10:44:13", "remaining_time": "13:15:14"} +{"current_steps": 3030, "total_steps": 6748, "loss": 1.4106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.898671454454418e-05, "epoch": 0.9, "percentage": 44.9, "elapsed_time": "10:46:22", "remaining_time": "13:13:09"} +{"current_steps": 3040, "total_steps": 6748, "loss": 1.4123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8871771424687078e-05, "epoch": 0.9, "percentage": 45.05, "elapsed_time": "10:48:29", "remaining_time": "13:10:58"} +{"current_steps": 3050, "total_steps": 6748, "loss": 1.4137, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8756744386173284e-05, "epoch": 0.9, "percentage": 45.2, "elapsed_time": "10:50:33", "remaining_time": "13:08:46"} +{"current_steps": 3060, "total_steps": 6748, "loss": 1.4009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8641635922154774e-05, "epoch": 0.91, "percentage": 45.35, "elapsed_time": "10:52:46", "remaining_time": "13:06:44"} +{"current_steps": 3070, "total_steps": 6748, "loss": 1.4159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8526448527548372e-05, "epoch": 0.91, "percentage": 45.49, "elapsed_time": "10:54:49", "remaining_time": "13:04:31"} +{"current_steps": 3080, "total_steps": 6748, "loss": 1.4071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8411184698981684e-05, "epoch": 0.91, "percentage": 45.64, "elapsed_time": "10:57:00", "remaining_time": "13:02:26"} +{"current_steps": 3090, "total_steps": 6748, "loss": 1.41, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.829584693473899e-05, "epoch": 0.92, "percentage": 45.79, "elapsed_time": "10:59:10", "remaining_time": "13:00:21"} +{"current_steps": 3100, "total_steps": 6748, "loss": 1.4038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8180437734707064e-05, "epoch": 0.92, "percentage": 45.94, "elapsed_time": "11:01:18", "remaining_time": "12:58:12"} +{"current_steps": 3110, "total_steps": 6748, "loss": 1.4069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8064959600321043e-05, "epoch": 0.92, "percentage": 46.09, "elapsed_time": "11:03:26", "remaining_time": "12:56:04"} +{"current_steps": 3120, "total_steps": 6748, "loss": 1.4096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7949415034510163e-05, "epoch": 0.92, "percentage": 46.24, "elapsed_time": "11:05:38", "remaining_time": "12:54:01"} +{"current_steps": 3130, "total_steps": 6748, "loss": 1.3821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7833806541643544e-05, "epoch": 0.93, "percentage": 46.38, "elapsed_time": "11:07:45", "remaining_time": "12:51:52"} +{"current_steps": 3140, "total_steps": 6748, "loss": 1.3886, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7718136627475865e-05, "epoch": 0.93, "percentage": 46.53, "elapsed_time": "11:09:52", "remaining_time": "12:49:43"} +{"current_steps": 3150, "total_steps": 6748, "loss": 1.3923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.76024077990931e-05, "epoch": 0.93, "percentage": 46.68, "elapsed_time": "11:12:00", "remaining_time": "12:47:34"} +{"current_steps": 3160, "total_steps": 6748, "loss": 1.4072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.748662256485816e-05, "epoch": 0.94, "percentage": 46.83, "elapsed_time": "11:14:04", "remaining_time": "12:45:22"} +{"current_steps": 3170, "total_steps": 6748, "loss": 1.4126, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7370783434356512e-05, "epoch": 0.94, "percentage": 46.98, "elapsed_time": "11:16:11", "remaining_time": "12:43:12"} +{"current_steps": 3180, "total_steps": 6748, "loss": 1.4238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7254892918341802e-05, "epoch": 0.94, "percentage": 47.13, "elapsed_time": "11:18:15", "remaining_time": "12:41:01"} +{"current_steps": 3190, "total_steps": 6748, "loss": 1.4183, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.713895352868144e-05, "epoch": 0.95, "percentage": 47.27, "elapsed_time": "11:20:23", "remaining_time": "12:38:52"} +{"current_steps": 3200, "total_steps": 6748, "loss": 1.4056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.702296777830212e-05, "epoch": 0.95, "percentage": 47.42, "elapsed_time": "11:22:29", "remaining_time": "12:36:42"} +{"current_steps": 3210, "total_steps": 6748, "loss": 1.4096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6906938181135423e-05, "epoch": 0.95, "percentage": 47.57, "elapsed_time": "11:24:32", "remaining_time": "12:34:29"} +{"current_steps": 3220, "total_steps": 6748, "loss": 1.4018, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6790867252063247e-05, "epoch": 0.95, "percentage": 47.72, "elapsed_time": "11:26:39", "remaining_time": "12:32:19"} +{"current_steps": 3230, "total_steps": 6748, "loss": 1.3922, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6674757506863357e-05, "epoch": 0.96, "percentage": 47.87, "elapsed_time": "11:28:47", "remaining_time": "12:30:12"} +{"current_steps": 3240, "total_steps": 6748, "loss": 1.4054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.655861146215483e-05, "epoch": 0.96, "percentage": 48.01, "elapsed_time": "11:30:54", "remaining_time": "12:28:03"} +{"current_steps": 3250, "total_steps": 6748, "loss": 1.3914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6442431635343528e-05, "epoch": 0.96, "percentage": 48.16, "elapsed_time": "11:33:08", "remaining_time": "12:26:02"} +{"current_steps": 3260, "total_steps": 6748, "loss": 1.3851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6326220544567514e-05, "epoch": 0.97, "percentage": 48.31, "elapsed_time": "11:35:17", "remaining_time": "12:23:55"} +{"current_steps": 3270, "total_steps": 6748, "loss": 1.4102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.620998070864248e-05, "epoch": 0.97, "percentage": 48.46, "elapsed_time": "11:37:25", "remaining_time": "12:21:47"} +{"current_steps": 3280, "total_steps": 6748, "loss": 1.4069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6093714647007156e-05, "epoch": 0.97, "percentage": 48.61, "elapsed_time": "11:39:33", "remaining_time": "12:19:38"} +{"current_steps": 3290, "total_steps": 6748, "loss": 1.3919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5977424879668705e-05, "epoch": 0.97, "percentage": 48.76, "elapsed_time": "11:41:41", "remaining_time": "12:17:30"} +{"current_steps": 3300, "total_steps": 6748, "loss": 1.4073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5861113927148096e-05, "epoch": 0.98, "percentage": 48.9, "elapsed_time": "11:43:46", "remaining_time": "12:15:20"} +{"current_steps": 3310, "total_steps": 6748, "loss": 1.4025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5744784310425467e-05, "epoch": 0.98, "percentage": 49.05, "elapsed_time": "11:45:57", "remaining_time": "12:13:15"} +{"current_steps": 3320, "total_steps": 6748, "loss": 1.3805, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.562843855088551e-05, "epoch": 0.98, "percentage": 49.2, "elapsed_time": "11:48:05", "remaining_time": "12:11:07"} +{"current_steps": 3330, "total_steps": 6748, "loss": 1.4032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5512079170262793e-05, "epoch": 0.99, "percentage": 49.35, "elapsed_time": "11:50:12", "remaining_time": "12:08:58"} +{"current_steps": 3340, "total_steps": 6748, "loss": 1.4232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5395708690587117e-05, "epoch": 0.99, "percentage": 49.5, "elapsed_time": "11:52:19", "remaining_time": "12:06:50"} +{"current_steps": 3350, "total_steps": 6748, "loss": 1.3897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.527932963412885e-05, "epoch": 0.99, "percentage": 49.64, "elapsed_time": "11:54:25", "remaining_time": "12:04:39"} +{"current_steps": 3360, "total_steps": 6748, "loss": 1.4008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5162944523344256e-05, "epoch": 1.0, "percentage": 49.79, "elapsed_time": "11:56:34", "remaining_time": "12:02:33"} +{"current_steps": 3370, "total_steps": 6748, "loss": 1.3936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5046555880820826e-05, "epoch": 1.0, "percentage": 49.94, "elapsed_time": "11:58:42", "remaining_time": "12:00:24"} +{"current_steps": 3380, "total_steps": 6748, "loss": 1.394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4930166229222597e-05, "epoch": 1.0, "percentage": 50.09, "elapsed_time": "12:00:48", "remaining_time": "11:58:14"} +{"current_steps": 3390, "total_steps": 6748, "loss": 1.3903, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.481377809123547e-05, "epoch": 1.0, "percentage": 50.24, "elapsed_time": "12:02:55", "remaining_time": "11:56:05"} +{"current_steps": 3400, "total_steps": 6748, "loss": 1.3869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.469739398951256e-05, "epoch": 1.01, "percentage": 50.39, "elapsed_time": "12:05:02", "remaining_time": "11:53:56"} +{"current_steps": 3410, "total_steps": 6748, "loss": 1.429, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.458101644661947e-05, "epoch": 1.01, "percentage": 50.53, "elapsed_time": "12:07:06", "remaining_time": "11:51:45"} +{"current_steps": 3420, "total_steps": 6748, "loss": 1.3987, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4464647984979667e-05, "epoch": 1.01, "percentage": 50.68, "elapsed_time": "12:09:20", "remaining_time": "11:49:43"} +{"current_steps": 3430, "total_steps": 6748, "loss": 1.38, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4348291126819783e-05, "epoch": 1.02, "percentage": 50.83, "elapsed_time": "12:11:29", "remaining_time": "11:47:36"} +{"current_steps": 3440, "total_steps": 6748, "loss": 1.3906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4231948394114936e-05, "epoch": 1.02, "percentage": 50.98, "elapsed_time": "12:13:37", "remaining_time": "11:45:28"} +{"current_steps": 3450, "total_steps": 6748, "loss": 1.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4115622308534096e-05, "epoch": 1.02, "percentage": 51.13, "elapsed_time": "12:15:51", "remaining_time": "11:43:26"} +{"current_steps": 3460, "total_steps": 6748, "loss": 1.4135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.399931539138541e-05, "epoch": 1.03, "percentage": 51.27, "elapsed_time": "12:17:57", "remaining_time": "11:41:16"} +{"current_steps": 3470, "total_steps": 6748, "loss": 1.3952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.388303016356156e-05, "epoch": 1.03, "percentage": 51.42, "elapsed_time": "12:20:03", "remaining_time": "11:39:06"} +{"current_steps": 3480, "total_steps": 6748, "loss": 1.3972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3766769145485125e-05, "epoch": 1.03, "percentage": 51.57, "elapsed_time": "12:22:13", "remaining_time": "11:37:00"} +{"current_steps": 3490, "total_steps": 6748, "loss": 1.3937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3650534857053943e-05, "epoch": 1.03, "percentage": 51.72, "elapsed_time": "12:24:23", "remaining_time": "11:34:54"} +{"current_steps": 3500, "total_steps": 6748, "loss": 1.3936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3534329817586513e-05, "epoch": 1.04, "percentage": 51.87, "elapsed_time": "12:26:29", "remaining_time": "11:32:45"} +{"current_steps": 3510, "total_steps": 6748, "loss": 1.397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3418156545767365e-05, "epoch": 1.04, "percentage": 52.02, "elapsed_time": "12:28:37", "remaining_time": "11:30:36"} +{"current_steps": 3520, "total_steps": 6748, "loss": 1.3849, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3302017559592494e-05, "epoch": 1.04, "percentage": 52.16, "elapsed_time": "12:30:45", "remaining_time": "11:28:29"} +{"current_steps": 3530, "total_steps": 6748, "loss": 1.4118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.318591537631476e-05, "epoch": 1.05, "percentage": 52.31, "elapsed_time": "12:32:52", "remaining_time": "11:26:19"} +{"current_steps": 3540, "total_steps": 6748, "loss": 1.414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3069852512389335e-05, "epoch": 1.05, "percentage": 52.46, "elapsed_time": "12:34:51", "remaining_time": "11:24:03"} +{"current_steps": 3550, "total_steps": 6748, "loss": 1.4088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2953831483419184e-05, "epoch": 1.05, "percentage": 52.61, "elapsed_time": "12:37:01", "remaining_time": "11:21:57"} +{"current_steps": 3560, "total_steps": 6748, "loss": 1.3773, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2837854804100504e-05, "epoch": 1.05, "percentage": 52.76, "elapsed_time": "12:39:08", "remaining_time": "11:19:49"} +{"current_steps": 3570, "total_steps": 6748, "loss": 1.3977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.272192498816825e-05, "epoch": 1.06, "percentage": 52.9, "elapsed_time": "12:41:15", "remaining_time": "11:17:40"} +{"current_steps": 3580, "total_steps": 6748, "loss": 1.3591, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.260604454834162e-05, "epoch": 1.06, "percentage": 53.05, "elapsed_time": "12:43:19", "remaining_time": "11:15:29"} +{"current_steps": 3590, "total_steps": 6748, "loss": 1.4023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2490215996269617e-05, "epoch": 1.06, "percentage": 53.2, "elapsed_time": "12:45:31", "remaining_time": "11:13:24"} +{"current_steps": 3600, "total_steps": 6748, "loss": 1.3873, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.237444184247661e-05, "epoch": 1.07, "percentage": 53.35, "elapsed_time": "12:47:33", "remaining_time": "11:11:11"} +{"current_steps": 3610, "total_steps": 6748, "loss": 1.3826, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2258724596307915e-05, "epoch": 1.07, "percentage": 53.5, "elapsed_time": "12:49:38", "remaining_time": "11:09:00"} +{"current_steps": 3620, "total_steps": 6748, "loss": 1.3732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.214306676587539e-05, "epoch": 1.07, "percentage": 53.65, "elapsed_time": "12:51:45", "remaining_time": "11:06:51"} +{"current_steps": 3630, "total_steps": 6748, "loss": 1.3988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2027470858003098e-05, "epoch": 1.08, "percentage": 53.79, "elapsed_time": "12:53:53", "remaining_time": "11:04:44"} +{"current_steps": 3640, "total_steps": 6748, "loss": 1.4036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1911939378172956e-05, "epoch": 1.08, "percentage": 53.94, "elapsed_time": "12:55:59", "remaining_time": "11:02:34"} +{"current_steps": 3650, "total_steps": 6748, "loss": 1.4236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1796474830470447e-05, "epoch": 1.08, "percentage": 54.09, "elapsed_time": "12:58:05", "remaining_time": "11:00:25"} +{"current_steps": 3660, "total_steps": 6748, "loss": 1.4032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1681079717530328e-05, "epoch": 1.08, "percentage": 54.24, "elapsed_time": "13:00:10", "remaining_time": "10:58:14"} +{"current_steps": 3670, "total_steps": 6748, "loss": 1.39, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.156575654048239e-05, "epoch": 1.09, "percentage": 54.39, "elapsed_time": "13:02:17", "remaining_time": "10:56:05"} +{"current_steps": 3680, "total_steps": 6748, "loss": 1.3757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.145050779889725e-05, "epoch": 1.09, "percentage": 54.53, "elapsed_time": "13:04:23", "remaining_time": "10:53:56"} +{"current_steps": 3690, "total_steps": 6748, "loss": 1.3934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1335335990732186e-05, "epoch": 1.09, "percentage": 54.68, "elapsed_time": "13:06:27", "remaining_time": "10:51:45"} +{"current_steps": 3700, "total_steps": 6748, "loss": 1.3979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1220243612276964e-05, "epoch": 1.1, "percentage": 54.83, "elapsed_time": "13:08:31", "remaining_time": "10:49:34"} +{"current_steps": 3710, "total_steps": 6748, "loss": 1.4181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.110523315809978e-05, "epoch": 1.1, "percentage": 54.98, "elapsed_time": "13:10:36", "remaining_time": "10:47:24"} +{"current_steps": 3720, "total_steps": 6748, "loss": 1.406, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0990307120993134e-05, "epoch": 1.1, "percentage": 55.13, "elapsed_time": "13:12:44", "remaining_time": "10:45:16"} +{"current_steps": 3730, "total_steps": 6748, "loss": 1.4036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0875467991919854e-05, "epoch": 1.11, "percentage": 55.28, "elapsed_time": "13:14:49", "remaining_time": "10:43:06"} +{"current_steps": 3740, "total_steps": 6748, "loss": 1.4095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.076071825995906e-05, "epoch": 1.11, "percentage": 55.42, "elapsed_time": "13:16:53", "remaining_time": "10:40:55"} +{"current_steps": 3750, "total_steps": 6748, "loss": 1.4048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0646060412252246e-05, "epoch": 1.11, "percentage": 55.57, "elapsed_time": "13:19:00", "remaining_time": "10:38:47"} +{"current_steps": 3760, "total_steps": 6748, "loss": 1.3874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0531496933949363e-05, "epoch": 1.11, "percentage": 55.72, "elapsed_time": "13:21:07", "remaining_time": "10:36:38"} +{"current_steps": 3770, "total_steps": 6748, "loss": 1.3793, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0417030308154953e-05, "epoch": 1.12, "percentage": 55.87, "elapsed_time": "13:23:17", "remaining_time": "10:34:31"} +{"current_steps": 3780, "total_steps": 6748, "loss": 1.4152, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0302663015874322e-05, "epoch": 1.12, "percentage": 56.02, "elapsed_time": "13:25:20", "remaining_time": "10:32:20"} +{"current_steps": 3790, "total_steps": 6748, "loss": 1.3738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0188397535959785e-05, "epoch": 1.12, "percentage": 56.16, "elapsed_time": "13:27:32", "remaining_time": "10:30:15"} +{"current_steps": 3800, "total_steps": 6748, "loss": 1.4033, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.007423634505692e-05, "epoch": 1.13, "percentage": 56.31, "elapsed_time": "13:29:35", "remaining_time": "10:28:04"} +{"current_steps": 3810, "total_steps": 6748, "loss": 1.3753, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9960181917550897e-05, "epoch": 1.13, "percentage": 56.46, "elapsed_time": "13:31:45", "remaining_time": "10:25:58"} +{"current_steps": 3820, "total_steps": 6748, "loss": 1.3791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9846236725512835e-05, "epoch": 1.13, "percentage": 56.61, "elapsed_time": "13:33:55", "remaining_time": "10:23:52"} +{"current_steps": 3830, "total_steps": 6748, "loss": 1.3837, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.973240323864624e-05, "epoch": 1.13, "percentage": 56.76, "elapsed_time": "13:36:03", "remaining_time": "10:21:44"} +{"current_steps": 3840, "total_steps": 6748, "loss": 1.3945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9618683924233467e-05, "epoch": 1.14, "percentage": 56.91, "elapsed_time": "13:38:10", "remaining_time": "10:19:35"} +{"current_steps": 3850, "total_steps": 6748, "loss": 1.3804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9505081247082237e-05, "epoch": 1.14, "percentage": 57.05, "elapsed_time": "13:40:18", "remaining_time": "10:17:28"} +{"current_steps": 3860, "total_steps": 6748, "loss": 1.3964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9391597669472213e-05, "epoch": 1.14, "percentage": 57.2, "elapsed_time": "13:42:23", "remaining_time": "10:15:18"} +{"current_steps": 3870, "total_steps": 6748, "loss": 1.3983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.927823565110165e-05, "epoch": 1.15, "percentage": 57.35, "elapsed_time": "13:44:26", "remaining_time": "10:13:06"} +{"current_steps": 3880, "total_steps": 6748, "loss": 1.4169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9164997649034058e-05, "epoch": 1.15, "percentage": 57.5, "elapsed_time": "13:46:29", "remaining_time": "10:10:55"} +{"current_steps": 3890, "total_steps": 6748, "loss": 1.4101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9051886117644963e-05, "epoch": 1.15, "percentage": 57.65, "elapsed_time": "13:48:38", "remaining_time": "10:08:48"} +{"current_steps": 3900, "total_steps": 6748, "loss": 1.3823, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.89389035085687e-05, "epoch": 1.16, "percentage": 57.79, "elapsed_time": "13:50:42", "remaining_time": "10:06:37"} +{"current_steps": 3910, "total_steps": 6748, "loss": 1.3827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8826052270645276e-05, "epoch": 1.16, "percentage": 57.94, "elapsed_time": "13:52:49", "remaining_time": "10:04:29"} +{"current_steps": 3920, "total_steps": 6748, "loss": 1.4035, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8713334849867315e-05, "epoch": 1.16, "percentage": 58.09, "elapsed_time": "13:55:02", "remaining_time": "10:02:25"} +{"current_steps": 3930, "total_steps": 6748, "loss": 1.4081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8600753689327e-05, "epoch": 1.16, "percentage": 58.24, "elapsed_time": "13:57:02", "remaining_time": "10:00:12"} +{"current_steps": 3940, "total_steps": 6748, "loss": 1.3919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8488311229163152e-05, "epoch": 1.17, "percentage": 58.39, "elapsed_time": "13:59:09", "remaining_time": "9:58:03"} +{"current_steps": 3950, "total_steps": 6748, "loss": 1.3854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8376009906508338e-05, "epoch": 1.17, "percentage": 58.54, "elapsed_time": "14:01:15", "remaining_time": "9:55:54"} +{"current_steps": 3960, "total_steps": 6748, "loss": 1.3924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.826385215543603e-05, "epoch": 1.17, "percentage": 58.68, "elapsed_time": "14:03:32", "remaining_time": "9:53:53"} +{"current_steps": 3970, "total_steps": 6748, "loss": 1.3851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8151840406907873e-05, "epoch": 1.18, "percentage": 58.83, "elapsed_time": "14:05:38", "remaining_time": "9:51:44"} +{"current_steps": 3980, "total_steps": 6748, "loss": 1.3707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8039977088720972e-05, "epoch": 1.18, "percentage": 58.98, "elapsed_time": "14:07:50", "remaining_time": "9:49:38"} +{"current_steps": 3990, "total_steps": 6748, "loss": 1.3998, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7928264625455282e-05, "epoch": 1.18, "percentage": 59.13, "elapsed_time": "14:09:55", "remaining_time": "9:47:29"} +{"current_steps": 4000, "total_steps": 6748, "loss": 1.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7816705438421064e-05, "epoch": 1.19, "percentage": 59.28, "elapsed_time": "14:12:05", "remaining_time": "9:45:22"} +{"current_steps": 4010, "total_steps": 6748, "loss": 1.3976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7705301945606384e-05, "epoch": 1.19, "percentage": 59.43, "elapsed_time": "14:14:11", "remaining_time": "9:43:14"} +{"current_steps": 4020, "total_steps": 6748, "loss": 1.3785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7594056561624716e-05, "epoch": 1.19, "percentage": 59.57, "elapsed_time": "14:16:22", "remaining_time": "9:41:08"} +{"current_steps": 4030, "total_steps": 6748, "loss": 1.3845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.748297169766262e-05, "epoch": 1.19, "percentage": 59.72, "elapsed_time": "14:18:31", "remaining_time": "9:39:01"} +{"current_steps": 4040, "total_steps": 6748, "loss": 1.3926, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7372049761427457e-05, "epoch": 1.2, "percentage": 59.87, "elapsed_time": "14:20:38", "remaining_time": "9:36:52"} +{"current_steps": 4050, "total_steps": 6748, "loss": 1.4075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7261293157095204e-05, "epoch": 1.2, "percentage": 60.02, "elapsed_time": "14:22:49", "remaining_time": "9:34:47"} +{"current_steps": 4060, "total_steps": 6748, "loss": 1.3938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7150704285258375e-05, "epoch": 1.2, "percentage": 60.17, "elapsed_time": "14:24:53", "remaining_time": "9:32:37"} +{"current_steps": 4070, "total_steps": 6748, "loss": 1.3884, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7040285542873945e-05, "epoch": 1.21, "percentage": 60.31, "elapsed_time": "14:26:57", "remaining_time": "9:30:26"} +{"current_steps": 4080, "total_steps": 6748, "loss": 1.4066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6930039323211448e-05, "epoch": 1.21, "percentage": 60.46, "elapsed_time": "14:29:05", "remaining_time": "9:28:19"} +{"current_steps": 4090, "total_steps": 6748, "loss": 1.3992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6819968015801048e-05, "epoch": 1.21, "percentage": 60.61, "elapsed_time": "14:31:19", "remaining_time": "9:26:14"} +{"current_steps": 4100, "total_steps": 6748, "loss": 1.4066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6710074006381797e-05, "epoch": 1.21, "percentage": 60.76, "elapsed_time": "14:33:23", "remaining_time": "9:24:04"} +{"current_steps": 4110, "total_steps": 6748, "loss": 1.4076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6600359676849892e-05, "epoch": 1.22, "percentage": 60.91, "elapsed_time": "14:35:25", "remaining_time": "9:21:53"} +{"current_steps": 4120, "total_steps": 6748, "loss": 1.4078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6490827405207062e-05, "epoch": 1.22, "percentage": 61.06, "elapsed_time": "14:37:34", "remaining_time": "9:19:46"} +{"current_steps": 4130, "total_steps": 6748, "loss": 1.4026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.638147956550904e-05, "epoch": 1.22, "percentage": 61.2, "elapsed_time": "14:39:46", "remaining_time": "9:17:41"} +{"current_steps": 4140, "total_steps": 6748, "loss": 1.3861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.627231852781407e-05, "epoch": 1.23, "percentage": 61.35, "elapsed_time": "14:41:53", "remaining_time": "9:15:32"} +{"current_steps": 4150, "total_steps": 6748, "loss": 1.3915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6163346658131567e-05, "epoch": 1.23, "percentage": 61.5, "elapsed_time": "14:44:05", "remaining_time": "9:13:27"} +{"current_steps": 4160, "total_steps": 6748, "loss": 1.3828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6054566318370832e-05, "epoch": 1.23, "percentage": 61.65, "elapsed_time": "14:46:17", "remaining_time": "9:11:22"} +{"current_steps": 4170, "total_steps": 6748, "loss": 1.3952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5945979866289844e-05, "epoch": 1.24, "percentage": 61.8, "elapsed_time": "14:48:22", "remaining_time": "9:09:13"} +{"current_steps": 4180, "total_steps": 6748, "loss": 1.3892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.583758965544417e-05, "epoch": 1.24, "percentage": 61.94, "elapsed_time": "14:50:30", "remaining_time": "9:07:05"} +{"current_steps": 4190, "total_steps": 6748, "loss": 1.3973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5729398035135957e-05, "epoch": 1.24, "percentage": 62.09, "elapsed_time": "14:52:37", "remaining_time": "9:04:56"} +{"current_steps": 4200, "total_steps": 6748, "loss": 1.4225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5621407350362986e-05, "epoch": 1.24, "percentage": 62.24, "elapsed_time": "14:54:44", "remaining_time": "9:02:48"} +{"current_steps": 4210, "total_steps": 6748, "loss": 1.3948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5513619941767886e-05, "epoch": 1.25, "percentage": 62.39, "elapsed_time": "14:56:46", "remaining_time": "9:00:37"} +{"current_steps": 4220, "total_steps": 6748, "loss": 1.4074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.540603814558736e-05, "epoch": 1.25, "percentage": 62.54, "elapsed_time": "14:58:56", "remaining_time": "8:58:30"} +{"current_steps": 4230, "total_steps": 6748, "loss": 1.3965, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5298664293601574e-05, "epoch": 1.25, "percentage": 62.69, "elapsed_time": "15:01:02", "remaining_time": "8:56:21"} +{"current_steps": 4240, "total_steps": 6748, "loss": 1.3743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5191500713083615e-05, "epoch": 1.26, "percentage": 62.83, "elapsed_time": "15:03:16", "remaining_time": "8:54:17"} +{"current_steps": 4250, "total_steps": 6748, "loss": 1.384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.508454972674904e-05, "epoch": 1.26, "percentage": 62.98, "elapsed_time": "15:05:23", "remaining_time": "8:52:09"} +{"current_steps": 4260, "total_steps": 6748, "loss": 1.4018, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4977813652705535e-05, "epoch": 1.26, "percentage": 63.13, "elapsed_time": "15:07:20", "remaining_time": "8:49:55"} +{"current_steps": 4270, "total_steps": 6748, "loss": 1.3904, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4871294804402675e-05, "epoch": 1.27, "percentage": 63.28, "elapsed_time": "15:09:31", "remaining_time": "8:47:49"} +{"current_steps": 4280, "total_steps": 6748, "loss": 1.3981, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4764995490581779e-05, "epoch": 1.27, "percentage": 63.43, "elapsed_time": "15:11:36", "remaining_time": "8:45:40"} +{"current_steps": 4290, "total_steps": 6748, "loss": 1.4144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.465891801522587e-05, "epoch": 1.27, "percentage": 63.57, "elapsed_time": "15:13:43", "remaining_time": "8:43:31"} +{"current_steps": 4300, "total_steps": 6748, "loss": 1.4172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4553064677509731e-05, "epoch": 1.27, "percentage": 63.72, "elapsed_time": "15:15:51", "remaining_time": "8:41:23"} +{"current_steps": 4310, "total_steps": 6748, "loss": 1.3873, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4447437771750078e-05, "epoch": 1.28, "percentage": 63.87, "elapsed_time": "15:17:59", "remaining_time": "8:39:16"} +{"current_steps": 4320, "total_steps": 6748, "loss": 1.3983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4342039587355832e-05, "epoch": 1.28, "percentage": 64.02, "elapsed_time": "15:20:05", "remaining_time": "8:37:07"} +{"current_steps": 4330, "total_steps": 6748, "loss": 1.4007, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.423687240877849e-05, "epoch": 1.28, "percentage": 64.17, "elapsed_time": "15:22:14", "remaining_time": "8:35:00"} +{"current_steps": 4340, "total_steps": 6748, "loss": 1.4088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4131938515462639e-05, "epoch": 1.29, "percentage": 64.32, "elapsed_time": "15:24:23", "remaining_time": "8:32:53"} +{"current_steps": 4350, "total_steps": 6748, "loss": 1.3941, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4027240181796508e-05, "epoch": 1.29, "percentage": 64.46, "elapsed_time": "15:26:33", "remaining_time": "8:30:46"} +{"current_steps": 4360, "total_steps": 6748, "loss": 1.3975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3922779677062689e-05, "epoch": 1.29, "percentage": 64.61, "elapsed_time": "15:28:46", "remaining_time": "8:28:41"} +{"current_steps": 4370, "total_steps": 6748, "loss": 1.3842, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3818559265388964e-05, "epoch": 1.29, "percentage": 64.76, "elapsed_time": "15:30:53", "remaining_time": "8:26:33"} +{"current_steps": 4380, "total_steps": 6748, "loss": 1.4011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3714581205699214e-05, "epoch": 1.3, "percentage": 64.91, "elapsed_time": "15:32:59", "remaining_time": "8:24:24"} +{"current_steps": 4390, "total_steps": 6748, "loss": 1.3881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3610847751664473e-05, "epoch": 1.3, "percentage": 65.06, "elapsed_time": "15:35:10", "remaining_time": "8:22:18"} +{"current_steps": 4400, "total_steps": 6748, "loss": 1.4028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3507361151654067e-05, "epoch": 1.3, "percentage": 65.2, "elapsed_time": "15:37:11", "remaining_time": "8:20:07"} +{"current_steps": 4410, "total_steps": 6748, "loss": 1.3973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.340412364868689e-05, "epoch": 1.31, "percentage": 65.35, "elapsed_time": "15:39:20", "remaining_time": "8:17:59"} +{"current_steps": 4420, "total_steps": 6748, "loss": 1.445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3301137480382786e-05, "epoch": 1.31, "percentage": 65.5, "elapsed_time": "15:41:26", "remaining_time": "8:15:51"} +{"current_steps": 4430, "total_steps": 6748, "loss": 1.3957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3198404878914044e-05, "epoch": 1.31, "percentage": 65.65, "elapsed_time": "15:43:35", "remaining_time": "8:13:44"} +{"current_steps": 4440, "total_steps": 6748, "loss": 1.395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3095928070957037e-05, "epoch": 1.32, "percentage": 65.8, "elapsed_time": "15:45:45", "remaining_time": "8:11:37"} +{"current_steps": 4450, "total_steps": 6748, "loss": 1.4157, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2993709277643922e-05, "epoch": 1.32, "percentage": 65.95, "elapsed_time": "15:47:53", "remaining_time": "8:09:29"} +{"current_steps": 4460, "total_steps": 6748, "loss": 1.4074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2891750714514545e-05, "epoch": 1.32, "percentage": 66.09, "elapsed_time": "15:50:05", "remaining_time": "8:07:24"} +{"current_steps": 4470, "total_steps": 6748, "loss": 1.3988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2790054591468381e-05, "epoch": 1.32, "percentage": 66.24, "elapsed_time": "15:52:16", "remaining_time": "8:05:17"} +{"current_steps": 4480, "total_steps": 6748, "loss": 1.3914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2688623112716652e-05, "epoch": 1.33, "percentage": 66.39, "elapsed_time": "15:54:26", "remaining_time": "8:03:11"} +{"current_steps": 4490, "total_steps": 6748, "loss": 1.3864, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2587458476734559e-05, "epoch": 1.33, "percentage": 66.54, "elapsed_time": "15:56:36", "remaining_time": "8:01:04"} +{"current_steps": 4500, "total_steps": 6748, "loss": 1.3934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.248656287621362e-05, "epoch": 1.33, "percentage": 66.69, "elapsed_time": "15:58:45", "remaining_time": "7:58:57"} +{"current_steps": 4510, "total_steps": 6748, "loss": 1.3893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2385938498014138e-05, "epoch": 1.34, "percentage": 66.83, "elapsed_time": "16:00:56", "remaining_time": "7:56:50"} +{"current_steps": 4520, "total_steps": 6748, "loss": 1.3991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2285587523117825e-05, "epoch": 1.34, "percentage": 66.98, "elapsed_time": "16:03:00", "remaining_time": "7:54:41"} +{"current_steps": 4530, "total_steps": 6748, "loss": 1.376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2185512126580512e-05, "epoch": 1.34, "percentage": 67.13, "elapsed_time": "16:05:10", "remaining_time": "7:52:34"} +{"current_steps": 4540, "total_steps": 6748, "loss": 1.3799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2085714477484997e-05, "epoch": 1.35, "percentage": 67.28, "elapsed_time": "16:07:15", "remaining_time": "7:50:25"} +{"current_steps": 4550, "total_steps": 6748, "loss": 1.3738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1986196738894078e-05, "epoch": 1.35, "percentage": 67.43, "elapsed_time": "16:09:25", "remaining_time": "7:48:18"} +{"current_steps": 4560, "total_steps": 6748, "loss": 1.3754, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.188696106780361e-05, "epoch": 1.35, "percentage": 67.58, "elapsed_time": "16:11:33", "remaining_time": "7:46:10"} +{"current_steps": 4570, "total_steps": 6748, "loss": 1.4006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.178800961509578e-05, "epoch": 1.35, "percentage": 67.72, "elapsed_time": "16:13:43", "remaining_time": "7:44:04"} +{"current_steps": 4580, "total_steps": 6748, "loss": 1.4012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1689344525492497e-05, "epoch": 1.36, "percentage": 67.87, "elapsed_time": "16:15:58", "remaining_time": "7:41:59"} +{"current_steps": 4590, "total_steps": 6748, "loss": 1.3973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1590967937508895e-05, "epoch": 1.36, "percentage": 68.02, "elapsed_time": "16:18:12", "remaining_time": "7:39:54"} +{"current_steps": 4600, "total_steps": 6748, "loss": 1.3737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.149288198340698e-05, "epoch": 1.36, "percentage": 68.17, "elapsed_time": "16:20:17", "remaining_time": "7:37:45"} +{"current_steps": 4610, "total_steps": 6748, "loss": 1.3998, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1395088789149419e-05, "epoch": 1.37, "percentage": 68.32, "elapsed_time": "16:22:29", "remaining_time": "7:35:39"} +{"current_steps": 4620, "total_steps": 6748, "loss": 1.4053, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1297590474353464e-05, "epoch": 1.37, "percentage": 68.46, "elapsed_time": "16:24:30", "remaining_time": "7:33:28"} +{"current_steps": 4630, "total_steps": 6748, "loss": 1.4038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1200389152245003e-05, "epoch": 1.37, "percentage": 68.61, "elapsed_time": "16:26:31", "remaining_time": "7:31:17"} +{"current_steps": 4640, "total_steps": 6748, "loss": 1.3968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1103486929612759e-05, "epoch": 1.37, "percentage": 68.76, "elapsed_time": "16:28:33", "remaining_time": "7:29:06"} +{"current_steps": 4650, "total_steps": 6748, "loss": 1.4037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1006885906762626e-05, "epoch": 1.38, "percentage": 68.91, "elapsed_time": "16:30:40", "remaining_time": "7:26:58"} +{"current_steps": 4660, "total_steps": 6748, "loss": 1.3901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0910588177472153e-05, "epoch": 1.38, "percentage": 69.06, "elapsed_time": "16:32:41", "remaining_time": "7:24:47"} +{"current_steps": 4670, "total_steps": 6748, "loss": 1.379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0814595828945154e-05, "epoch": 1.38, "percentage": 69.21, "elapsed_time": "16:34:53", "remaining_time": "7:22:41"} +{"current_steps": 4680, "total_steps": 6748, "loss": 1.3808, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0718910941766478e-05, "epoch": 1.39, "percentage": 69.35, "elapsed_time": "16:36:57", "remaining_time": "7:20:32"} +{"current_steps": 4690, "total_steps": 6748, "loss": 1.4105, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0623535589856887e-05, "epoch": 1.39, "percentage": 69.5, "elapsed_time": "16:39:02", "remaining_time": "7:18:23"} +{"current_steps": 4700, "total_steps": 6748, "loss": 1.3756, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0528471840428142e-05, "epoch": 1.39, "percentage": 69.65, "elapsed_time": "16:41:16", "remaining_time": "7:16:18"} +{"current_steps": 4710, "total_steps": 6748, "loss": 1.3708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0433721753938182e-05, "epoch": 1.4, "percentage": 69.8, "elapsed_time": "16:43:24", "remaining_time": "7:14:10"} +{"current_steps": 4720, "total_steps": 6748, "loss": 1.3924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0339287384046462e-05, "epoch": 1.4, "percentage": 69.95, "elapsed_time": "16:45:31", "remaining_time": "7:12:02"} +{"current_steps": 4730, "total_steps": 6748, "loss": 1.3854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.024517077756943e-05, "epoch": 1.4, "percentage": 70.09, "elapsed_time": "16:47:39", "remaining_time": "7:09:54"} +{"current_steps": 4740, "total_steps": 6748, "loss": 1.3908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0151373974436184e-05, "epoch": 1.4, "percentage": 70.24, "elapsed_time": "16:49:42", "remaining_time": "7:07:44"} +{"current_steps": 4750, "total_steps": 6748, "loss": 1.3953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0057899007644245e-05, "epoch": 1.41, "percentage": 70.39, "elapsed_time": "16:51:57", "remaining_time": "7:05:39"} +{"current_steps": 4760, "total_steps": 6748, "loss": 1.3933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.964747903215513e-06, "epoch": 1.41, "percentage": 70.54, "elapsed_time": "16:54:03", "remaining_time": "7:03:31"} +{"current_steps": 4770, "total_steps": 6748, "loss": 1.3854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.871922680152318e-06, "epoch": 1.41, "percentage": 70.69, "elapsed_time": "16:56:08", "remaining_time": "7:01:22"} +{"current_steps": 4780, "total_steps": 6748, "loss": 1.4026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.779425350393685e-06, "epoch": 1.42, "percentage": 70.84, "elapsed_time": "16:58:12", "remaining_time": "6:59:12"} +{"current_steps": 4790, "total_steps": 6748, "loss": 1.3958, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.687257918771719e-06, "epoch": 1.42, "percentage": 70.98, "elapsed_time": "17:00:20", "remaining_time": "6:57:04"} +{"current_steps": 4800, "total_steps": 6748, "loss": 1.3777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.595422382968156e-06, "epoch": 1.42, "percentage": 71.13, "elapsed_time": "17:02:26", "remaining_time": "6:54:56"} +{"current_steps": 4810, "total_steps": 6748, "loss": 1.3835, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.503920733471052e-06, "epoch": 1.43, "percentage": 71.28, "elapsed_time": "17:04:28", "remaining_time": "6:52:46"} +{"current_steps": 4820, "total_steps": 6748, "loss": 1.3768, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.412754953531663e-06, "epoch": 1.43, "percentage": 71.43, "elapsed_time": "17:06:30", "remaining_time": "6:50:36"} +{"current_steps": 4830, "total_steps": 6748, "loss": 1.3846, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.321927019121435e-06, "epoch": 1.43, "percentage": 71.58, "elapsed_time": "17:08:42", "remaining_time": "6:48:29"} +{"current_steps": 4840, "total_steps": 6748, "loss": 1.3878, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.231438898889184e-06, "epoch": 1.43, "percentage": 71.72, "elapsed_time": "17:10:53", "remaining_time": "6:46:23"} +{"current_steps": 4850, "total_steps": 6748, "loss": 1.38, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.141292554118435e-06, "epoch": 1.44, "percentage": 71.87, "elapsed_time": "17:13:07", "remaining_time": "6:44:18"} +{"current_steps": 4860, "total_steps": 6748, "loss": 1.3841, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.051489938684903e-06, "epoch": 1.44, "percentage": 72.02, "elapsed_time": "17:15:17", "remaining_time": "6:42:11"} +{"current_steps": 4870, "total_steps": 6748, "loss": 1.4122, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.962032999014144e-06, "epoch": 1.44, "percentage": 72.17, "elapsed_time": "17:17:20", "remaining_time": "6:40:01"} +{"current_steps": 4880, "total_steps": 6748, "loss": 1.3839, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.87292367403937e-06, "epoch": 1.45, "percentage": 72.32, "elapsed_time": "17:19:30", "remaining_time": "6:37:54"} +{"current_steps": 4890, "total_steps": 6748, "loss": 1.3932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.784163895159428e-06, "epoch": 1.45, "percentage": 72.47, "elapsed_time": "17:21:38", "remaining_time": "6:35:46"} +{"current_steps": 4900, "total_steps": 6748, "loss": 1.4012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.695755586196924e-06, "epoch": 1.45, "percentage": 72.61, "elapsed_time": "17:23:46", "remaining_time": "6:33:39"} +{"current_steps": 4910, "total_steps": 6748, "loss": 1.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.607700663356543e-06, "epoch": 1.45, "percentage": 72.76, "elapsed_time": "17:25:50", "remaining_time": "6:31:29"} +{"current_steps": 4920, "total_steps": 6748, "loss": 1.4003, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.520001035183503e-06, "epoch": 1.46, "percentage": 72.91, "elapsed_time": "17:28:00", "remaining_time": "6:29:22"} +{"current_steps": 4930, "total_steps": 6748, "loss": 1.4064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.432658602522193e-06, "epoch": 1.46, "percentage": 73.06, "elapsed_time": "17:30:09", "remaining_time": "6:27:15"} +{"current_steps": 4940, "total_steps": 6748, "loss": 1.383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.345675258474969e-06, "epoch": 1.46, "percentage": 73.21, "elapsed_time": "17:32:23", "remaining_time": "6:25:09"} +{"current_steps": 4950, "total_steps": 6748, "loss": 1.4147, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.259052888361132e-06, "epoch": 1.47, "percentage": 73.36, "elapsed_time": "17:34:23", "remaining_time": "6:22:59"} +{"current_steps": 4960, "total_steps": 6748, "loss": 1.4064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.172793369676052e-06, "epoch": 1.47, "percentage": 73.5, "elapsed_time": "17:36:27", "remaining_time": "6:20:50"} +{"current_steps": 4970, "total_steps": 6748, "loss": 1.3894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.086898572050494e-06, "epoch": 1.47, "percentage": 73.65, "elapsed_time": "17:38:34", "remaining_time": "6:18:41"} +{"current_steps": 4980, "total_steps": 6748, "loss": 1.3928, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.00137035721007e-06, "epoch": 1.48, "percentage": 73.8, "elapsed_time": "17:40:44", "remaining_time": "6:16:35"} +{"current_steps": 4990, "total_steps": 6748, "loss": 1.4049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.916210578934896e-06, "epoch": 1.48, "percentage": 73.95, "elapsed_time": "17:42:57", "remaining_time": "6:14:28"} +{"current_steps": 5000, "total_steps": 6748, "loss": 1.402, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.831421083019422e-06, "epoch": 1.48, "percentage": 74.1, "elapsed_time": "17:45:07", "remaining_time": "6:12:22"} +{"current_steps": 5010, "total_steps": 6748, "loss": 1.4144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.747003707232415e-06, "epoch": 1.48, "percentage": 74.24, "elapsed_time": "17:47:13", "remaining_time": "6:10:13"} +{"current_steps": 5020, "total_steps": 6748, "loss": 1.3884, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.66296028127713e-06, "epoch": 1.49, "percentage": 74.39, "elapsed_time": "17:49:20", "remaining_time": "6:08:05"} +{"current_steps": 5030, "total_steps": 6748, "loss": 1.4116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.579292626751647e-06, "epoch": 1.49, "percentage": 74.54, "elapsed_time": "17:51:25", "remaining_time": "6:05:56"} +{"current_steps": 5040, "total_steps": 6748, "loss": 1.3828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.4960025571094025e-06, "epoch": 1.49, "percentage": 74.69, "elapsed_time": "17:53:36", "remaining_time": "6:03:50"} +{"current_steps": 5050, "total_steps": 6748, "loss": 1.3821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.413091877619868e-06, "epoch": 1.5, "percentage": 74.84, "elapsed_time": "17:55:39", "remaining_time": "6:01:40"} +{"current_steps": 5060, "total_steps": 6748, "loss": 1.4068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.330562385329429e-06, "epoch": 1.5, "percentage": 74.99, "elapsed_time": "17:57:49", "remaining_time": "5:59:33"} +{"current_steps": 5070, "total_steps": 6748, "loss": 1.3842, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.248415869022434e-06, "epoch": 1.5, "percentage": 75.13, "elapsed_time": "17:59:56", "remaining_time": "5:57:25"} +{"current_steps": 5080, "total_steps": 6748, "loss": 1.3937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.16665410918243e-06, "epoch": 1.51, "percentage": 75.28, "elapsed_time": "18:02:02", "remaining_time": "5:55:16"} +{"current_steps": 5090, "total_steps": 6748, "loss": 1.4077, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.085278877953558e-06, "epoch": 1.51, "percentage": 75.43, "elapsed_time": "18:04:11", "remaining_time": "5:53:09"} +{"current_steps": 5100, "total_steps": 6748, "loss": 1.3989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.004291939102148e-06, "epoch": 1.51, "percentage": 75.58, "elapsed_time": "18:06:17", "remaining_time": "5:51:01"} +{"current_steps": 5110, "total_steps": 6748, "loss": 1.3727, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.923695047978502e-06, "epoch": 1.51, "percentage": 75.73, "elapsed_time": "18:08:26", "remaining_time": "5:48:53"} +{"current_steps": 5120, "total_steps": 6748, "loss": 1.3842, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.843489951478829e-06, "epoch": 1.52, "percentage": 75.87, "elapsed_time": "18:10:32", "remaining_time": "5:46:45"} +{"current_steps": 5130, "total_steps": 6748, "loss": 1.3662, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.763678388007394e-06, "epoch": 1.52, "percentage": 76.02, "elapsed_time": "18:12:44", "remaining_time": "5:44:39"} +{"current_steps": 5140, "total_steps": 6748, "loss": 1.4092, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.684262087438839e-06, "epoch": 1.52, "percentage": 76.17, "elapsed_time": "18:14:47", "remaining_time": "5:42:29"} +{"current_steps": 5150, "total_steps": 6748, "loss": 1.399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.605242771080686e-06, "epoch": 1.53, "percentage": 76.32, "elapsed_time": "18:16:53", "remaining_time": "5:40:21"} +{"current_steps": 5160, "total_steps": 6748, "loss": 1.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.526622151636011e-06, "epoch": 1.53, "percentage": 76.47, "elapsed_time": "18:19:03", "remaining_time": "5:38:14"} +{"current_steps": 5170, "total_steps": 6748, "loss": 1.3824, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.448401933166351e-06, "epoch": 1.53, "percentage": 76.62, "elapsed_time": "18:21:15", "remaining_time": "5:36:07"} +{"current_steps": 5180, "total_steps": 6748, "loss": 1.3764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.370583811054778e-06, "epoch": 1.53, "percentage": 76.76, "elapsed_time": "18:23:30", "remaining_time": "5:34:02"} +{"current_steps": 5190, "total_steps": 6748, "loss": 1.3835, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.293169471969104e-06, "epoch": 1.54, "percentage": 76.91, "elapsed_time": "18:25:39", "remaining_time": "5:31:54"} +{"current_steps": 5200, "total_steps": 6748, "loss": 1.382, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.216160593825363e-06, "epoch": 1.54, "percentage": 77.06, "elapsed_time": "18:27:50", "remaining_time": "5:29:47"} +{"current_steps": 5210, "total_steps": 6748, "loss": 1.3983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.1395588457514226e-06, "epoch": 1.54, "percentage": 77.21, "elapsed_time": "18:29:54", "remaining_time": "5:27:38"} +{"current_steps": 5220, "total_steps": 6748, "loss": 1.3709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.063365888050829e-06, "epoch": 1.55, "percentage": 77.36, "elapsed_time": "18:32:09", "remaining_time": "5:25:33"} +{"current_steps": 5230, "total_steps": 6748, "loss": 1.4037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.987583372166794e-06, "epoch": 1.55, "percentage": 77.5, "elapsed_time": "18:34:21", "remaining_time": "5:23:26"} +{"current_steps": 5240, "total_steps": 6748, "loss": 1.3955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.912212940646422e-06, "epoch": 1.55, "percentage": 77.65, "elapsed_time": "18:36:29", "remaining_time": "5:21:18"} +{"current_steps": 5250, "total_steps": 6748, "loss": 1.384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.8372562271051e-06, "epoch": 1.56, "percentage": 77.8, "elapsed_time": "18:38:41", "remaining_time": "5:19:11"} +{"current_steps": 5260, "total_steps": 6748, "loss": 1.3772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.762714856191087e-06, "epoch": 1.56, "percentage": 77.95, "elapsed_time": "18:40:48", "remaining_time": "5:17:04"} +{"current_steps": 5270, "total_steps": 6748, "loss": 1.3818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.688590443550304e-06, "epoch": 1.56, "percentage": 78.1, "elapsed_time": "18:42:55", "remaining_time": "5:14:55"} +{"current_steps": 5280, "total_steps": 6748, "loss": 1.4039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.61488459579132e-06, "epoch": 1.56, "percentage": 78.25, "elapsed_time": "18:45:04", "remaining_time": "5:12:48"} +{"current_steps": 5290, "total_steps": 6748, "loss": 1.3935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.541598910450518e-06, "epoch": 1.57, "percentage": 78.39, "elapsed_time": "18:47:12", "remaining_time": "5:10:40"} +{"current_steps": 5300, "total_steps": 6748, "loss": 1.402, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.4687349759574845e-06, "epoch": 1.57, "percentage": 78.54, "elapsed_time": "18:49:19", "remaining_time": "5:08:32"} +{"current_steps": 5310, "total_steps": 6748, "loss": 1.3774, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.396294371600569e-06, "epoch": 1.57, "percentage": 78.69, "elapsed_time": "18:51:32", "remaining_time": "5:06:25"} +{"current_steps": 5320, "total_steps": 6748, "loss": 1.3936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.3242786674926545e-06, "epoch": 1.58, "percentage": 78.84, "elapsed_time": "18:53:33", "remaining_time": "5:04:16"} +{"current_steps": 5330, "total_steps": 6748, "loss": 1.3914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.252689424537139e-06, "epoch": 1.58, "percentage": 78.99, "elapsed_time": "18:55:41", "remaining_time": "5:02:08"} +{"current_steps": 5340, "total_steps": 6748, "loss": 1.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.181528194394081e-06, "epoch": 1.58, "percentage": 79.13, "elapsed_time": "18:57:50", "remaining_time": "5:00:00"} +{"current_steps": 5350, "total_steps": 6748, "loss": 1.3854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.11079651944659e-06, "epoch": 1.59, "percentage": 79.28, "elapsed_time": "18:59:56", "remaining_time": "4:57:52"} +{"current_steps": 5360, "total_steps": 6748, "loss": 1.4101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.040495932767386e-06, "epoch": 1.59, "percentage": 79.43, "elapsed_time": "19:02:01", "remaining_time": "4:55:44"} +{"current_steps": 5370, "total_steps": 6748, "loss": 1.3929, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.970627958085574e-06, "epoch": 1.59, "percentage": 79.58, "elapsed_time": "19:04:13", "remaining_time": "4:53:37"} +{"current_steps": 5380, "total_steps": 6748, "loss": 1.3826, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.901194109753607e-06, "epoch": 1.59, "percentage": 79.73, "elapsed_time": "19:06:11", "remaining_time": "4:51:26"} +{"current_steps": 5390, "total_steps": 6748, "loss": 1.3974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.832195892714489e-06, "epoch": 1.6, "percentage": 79.88, "elapsed_time": "19:08:23", "remaining_time": "4:49:20"} +{"current_steps": 5400, "total_steps": 6748, "loss": 1.3988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.763634802469124e-06, "epoch": 1.6, "percentage": 80.02, "elapsed_time": "19:10:30", "remaining_time": "4:47:11"} +{"current_steps": 5410, "total_steps": 6748, "loss": 1.3875, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6955123250439245e-06, "epoch": 1.6, "percentage": 80.17, "elapsed_time": "19:12:41", "remaining_time": "4:45:05"} +{"current_steps": 5420, "total_steps": 6748, "loss": 1.3881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6278299369585916e-06, "epoch": 1.61, "percentage": 80.32, "elapsed_time": "19:14:54", "remaining_time": "4:42:58"} +{"current_steps": 5430, "total_steps": 6748, "loss": 1.3924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.560589105194121e-06, "epoch": 1.61, "percentage": 80.47, "elapsed_time": "19:17:09", "remaining_time": "4:40:52"} +{"current_steps": 5440, "total_steps": 6748, "loss": 1.4058, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.493791287160998e-06, "epoch": 1.61, "percentage": 80.62, "elapsed_time": "19:19:15", "remaining_time": "4:38:43"} +{"current_steps": 5450, "total_steps": 6748, "loss": 1.3946, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4274379306676164e-06, "epoch": 1.61, "percentage": 80.76, "elapsed_time": "19:21:20", "remaining_time": "4:36:35"} +{"current_steps": 5460, "total_steps": 6748, "loss": 1.4045, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.361530473888889e-06, "epoch": 1.62, "percentage": 80.91, "elapsed_time": "19:23:26", "remaining_time": "4:34:27"} +{"current_steps": 5470, "total_steps": 6748, "loss": 1.3817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.296070345335085e-06, "epoch": 1.62, "percentage": 81.06, "elapsed_time": "19:25:34", "remaining_time": "4:32:19"} +{"current_steps": 5480, "total_steps": 6748, "loss": 1.3989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.231058963820867e-06, "epoch": 1.62, "percentage": 81.21, "elapsed_time": "19:27:40", "remaining_time": "4:30:11"} +{"current_steps": 5490, "total_steps": 6748, "loss": 1.4004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.166497738434527e-06, "epoch": 1.63, "percentage": 81.36, "elapsed_time": "19:29:50", "remaining_time": "4:28:03"} +{"current_steps": 5500, "total_steps": 6748, "loss": 1.3905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.102388068507465e-06, "epoch": 1.63, "percentage": 81.51, "elapsed_time": "19:32:00", "remaining_time": "4:25:56"} +{"current_steps": 5510, "total_steps": 6748, "loss": 1.3731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.03873134358384e-06, "epoch": 1.63, "percentage": 81.65, "elapsed_time": "19:34:09", "remaining_time": "4:23:48"} +{"current_steps": 5520, "total_steps": 6748, "loss": 1.383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9755289433904694e-06, "epoch": 1.64, "percentage": 81.8, "elapsed_time": "19:36:26", "remaining_time": "4:21:43"} +{"current_steps": 5530, "total_steps": 6748, "loss": 1.3853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.912782237806903e-06, "epoch": 1.64, "percentage": 81.95, "elapsed_time": "19:38:38", "remaining_time": "4:19:36"} +{"current_steps": 5540, "total_steps": 6748, "loss": 1.4039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.850492586835755e-06, "epoch": 1.64, "percentage": 82.1, "elapsed_time": "19:40:51", "remaining_time": "4:17:29"} +{"current_steps": 5550, "total_steps": 6748, "loss": 1.3703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.788661340573213e-06, "epoch": 1.64, "percentage": 82.25, "elapsed_time": "19:42:58", "remaining_time": "4:15:21"} +{"current_steps": 5560, "total_steps": 6748, "loss": 1.388, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7272898391797734e-06, "epoch": 1.65, "percentage": 82.39, "elapsed_time": "19:45:03", "remaining_time": "4:13:12"} +{"current_steps": 5570, "total_steps": 6748, "loss": 1.3785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6663794128512038e-06, "epoch": 1.65, "percentage": 82.54, "elapsed_time": "19:47:09", "remaining_time": "4:11:04"} +{"current_steps": 5580, "total_steps": 6748, "loss": 1.3901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6059313817897065e-06, "epoch": 1.65, "percentage": 82.69, "elapsed_time": "19:49:17", "remaining_time": "4:08:56"} +{"current_steps": 5590, "total_steps": 6748, "loss": 1.3894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5459470561753e-06, "epoch": 1.66, "percentage": 82.84, "elapsed_time": "19:51:23", "remaining_time": "4:06:48"} +{"current_steps": 5600, "total_steps": 6748, "loss": 1.3799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4864277361374264e-06, "epoch": 1.66, "percentage": 82.99, "elapsed_time": "19:53:33", "remaining_time": "4:04:40"} +{"current_steps": 5610, "total_steps": 6748, "loss": 1.3798, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4273747117267774e-06, "epoch": 1.66, "percentage": 83.14, "elapsed_time": "19:55:42", "remaining_time": "4:02:33"} +{"current_steps": 5620, "total_steps": 6748, "loss": 1.3948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3687892628873175e-06, "epoch": 1.67, "percentage": 83.28, "elapsed_time": "19:57:51", "remaining_time": "4:00:25"} +{"current_steps": 5630, "total_steps": 6748, "loss": 1.4071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.310672659428557e-06, "epoch": 1.67, "percentage": 83.43, "elapsed_time": "20:00:00", "remaining_time": "3:58:17"} +{"current_steps": 5640, "total_steps": 6748, "loss": 1.3993, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2530261609980183e-06, "epoch": 1.67, "percentage": 83.58, "elapsed_time": "20:02:02", "remaining_time": "3:56:08"} +{"current_steps": 5650, "total_steps": 6748, "loss": 1.3885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.195851017053944e-06, "epoch": 1.67, "percentage": 83.73, "elapsed_time": "20:04:07", "remaining_time": "3:54:00"} +{"current_steps": 5660, "total_steps": 6748, "loss": 1.3919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1391484668382073e-06, "epoch": 1.68, "percentage": 83.88, "elapsed_time": "20:06:16", "remaining_time": "3:51:52"} +{"current_steps": 5670, "total_steps": 6748, "loss": 1.3965, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0829197393494548e-06, "epoch": 1.68, "percentage": 84.02, "elapsed_time": "20:08:25", "remaining_time": "3:49:45"} +{"current_steps": 5680, "total_steps": 6748, "loss": 1.4135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0271660533164714e-06, "epoch": 1.68, "percentage": 84.17, "elapsed_time": "20:10:31", "remaining_time": "3:47:36"} +{"current_steps": 5690, "total_steps": 6748, "loss": 1.3923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9718886171717613e-06, "epoch": 1.69, "percentage": 84.32, "elapsed_time": "20:12:41", "remaining_time": "3:45:29"} +{"current_steps": 5700, "total_steps": 6748, "loss": 1.3663, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9170886290253552e-06, "epoch": 1.69, "percentage": 84.47, "elapsed_time": "20:14:45", "remaining_time": "3:43:20"} +{"current_steps": 5710, "total_steps": 6748, "loss": 1.3772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8627672766388448e-06, "epoch": 1.69, "percentage": 84.62, "elapsed_time": "20:16:51", "remaining_time": "3:41:12"} +{"current_steps": 5720, "total_steps": 6748, "loss": 1.373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8089257373996424e-06, "epoch": 1.69, "percentage": 84.77, "elapsed_time": "20:18:59", "remaining_time": "3:39:04"} +{"current_steps": 5730, "total_steps": 6748, "loss": 1.3858, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.755565178295447e-06, "epoch": 1.7, "percentage": 84.91, "elapsed_time": "20:21:09", "remaining_time": "3:36:57"} +{"current_steps": 5740, "total_steps": 6748, "loss": 1.3996, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7026867558889694e-06, "epoch": 1.7, "percentage": 85.06, "elapsed_time": "20:23:19", "remaining_time": "3:34:49"} +{"current_steps": 5750, "total_steps": 6748, "loss": 1.4194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6502916162928463e-06, "epoch": 1.7, "percentage": 85.21, "elapsed_time": "20:25:25", "remaining_time": "3:32:41"} +{"current_steps": 5760, "total_steps": 6748, "loss": 1.3895, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5983808951448196e-06, "epoch": 1.71, "percentage": 85.36, "elapsed_time": "20:27:36", "remaining_time": "3:30:34"} +{"current_steps": 5770, "total_steps": 6748, "loss": 1.376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5469557175830993e-06, "epoch": 1.71, "percentage": 85.51, "elapsed_time": "20:29:47", "remaining_time": "3:28:26"} +{"current_steps": 5780, "total_steps": 6748, "loss": 1.3901, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.496017198221995e-06, "epoch": 1.71, "percentage": 85.66, "elapsed_time": "20:31:51", "remaining_time": "3:26:18"} +{"current_steps": 5790, "total_steps": 6748, "loss": 1.3782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.445566441127742e-06, "epoch": 1.72, "percentage": 85.8, "elapsed_time": "20:34:00", "remaining_time": "3:24:10"} +{"current_steps": 5800, "total_steps": 6748, "loss": 1.3723, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3956045397945826e-06, "epoch": 1.72, "percentage": 85.95, "elapsed_time": "20:36:09", "remaining_time": "3:22:02"} +{"current_steps": 5810, "total_steps": 6748, "loss": 1.3705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3461325771210683e-06, "epoch": 1.72, "percentage": 86.1, "elapsed_time": "20:38:13", "remaining_time": "3:19:54"} +{"current_steps": 5820, "total_steps": 6748, "loss": 1.4018, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.297151625386576e-06, "epoch": 1.72, "percentage": 86.25, "elapsed_time": "20:40:25", "remaining_time": "3:17:47"} +{"current_steps": 5830, "total_steps": 6748, "loss": 1.3811, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2486627462280724e-06, "epoch": 1.73, "percentage": 86.4, "elapsed_time": "20:42:31", "remaining_time": "3:15:38"} +{"current_steps": 5840, "total_steps": 6748, "loss": 1.3894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.200666990617098e-06, "epoch": 1.73, "percentage": 86.54, "elapsed_time": "20:44:39", "remaining_time": "3:13:31"} +{"current_steps": 5850, "total_steps": 6748, "loss": 1.3931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.153165398837009e-06, "epoch": 1.73, "percentage": 86.69, "elapsed_time": "20:46:45", "remaining_time": "3:11:22"} +{"current_steps": 5860, "total_steps": 6748, "loss": 1.3775, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1061590004603978e-06, "epoch": 1.74, "percentage": 86.84, "elapsed_time": "20:48:46", "remaining_time": "3:09:14"} +{"current_steps": 5870, "total_steps": 6748, "loss": 1.3897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.059648814326806e-06, "epoch": 1.74, "percentage": 86.99, "elapsed_time": "20:50:55", "remaining_time": "3:07:06"} +{"current_steps": 5880, "total_steps": 6748, "loss": 1.3919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.013635848520626e-06, "epoch": 1.74, "percentage": 87.14, "elapsed_time": "20:53:02", "remaining_time": "3:04:58"} +{"current_steps": 5890, "total_steps": 6748, "loss": 1.4191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9681211003492543e-06, "epoch": 1.75, "percentage": 87.29, "elapsed_time": "20:55:12", "remaining_time": "3:02:50"} +{"current_steps": 5900, "total_steps": 6748, "loss": 1.4027, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.923105556321475e-06, "epoch": 1.75, "percentage": 87.43, "elapsed_time": "20:57:26", "remaining_time": "3:00:43"} +{"current_steps": 5910, "total_steps": 6748, "loss": 1.3767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8785901921260784e-06, "epoch": 1.75, "percentage": 87.58, "elapsed_time": "20:59:39", "remaining_time": "2:58:36"} +{"current_steps": 5920, "total_steps": 6748, "loss": 1.3801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8345759726107193e-06, "epoch": 1.75, "percentage": 87.73, "elapsed_time": "21:01:46", "remaining_time": "2:56:28"} +{"current_steps": 5930, "total_steps": 6748, "loss": 1.3881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7910638517609962e-06, "epoch": 1.76, "percentage": 87.88, "elapsed_time": "21:03:55", "remaining_time": "2:54:20"} +{"current_steps": 5940, "total_steps": 6748, "loss": 1.3915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.748054772679772e-06, "epoch": 1.76, "percentage": 88.03, "elapsed_time": "21:06:03", "remaining_time": "2:52:13"} +{"current_steps": 5950, "total_steps": 6748, "loss": 1.403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.705549667566747e-06, "epoch": 1.76, "percentage": 88.17, "elapsed_time": "21:08:10", "remaining_time": "2:50:05"} +{"current_steps": 5960, "total_steps": 6748, "loss": 1.3882, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6635494576982353e-06, "epoch": 1.77, "percentage": 88.32, "elapsed_time": "21:10:17", "remaining_time": "2:47:57"} +{"current_steps": 5970, "total_steps": 6748, "loss": 1.3902, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6220550534072094e-06, "epoch": 1.77, "percentage": 88.47, "elapsed_time": "21:12:29", "remaining_time": "2:45:49"} +{"current_steps": 5980, "total_steps": 6748, "loss": 1.3928, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5810673540635702e-06, "epoch": 1.77, "percentage": 88.62, "elapsed_time": "21:14:32", "remaining_time": "2:43:41"} +{"current_steps": 5990, "total_steps": 6748, "loss": 1.3819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.540587248054645e-06, "epoch": 1.77, "percentage": 88.77, "elapsed_time": "21:16:43", "remaining_time": "2:41:33"} +{"current_steps": 6000, "total_steps": 6748, "loss": 1.3948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5006156127659348e-06, "epoch": 1.78, "percentage": 88.92, "elapsed_time": "21:18:53", "remaining_time": "2:39:26"} +{"current_steps": 6010, "total_steps": 6748, "loss": 1.3914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4611533145621026e-06, "epoch": 1.78, "percentage": 89.06, "elapsed_time": "21:21:05", "remaining_time": "2:37:18"} +{"current_steps": 6020, "total_steps": 6748, "loss": 1.3713, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.422201208768187e-06, "epoch": 1.78, "percentage": 89.21, "elapsed_time": "21:23:11", "remaining_time": "2:35:10"} +{"current_steps": 6030, "total_steps": 6748, "loss": 1.3922, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3837601396510745e-06, "epoch": 1.79, "percentage": 89.36, "elapsed_time": "21:25:15", "remaining_time": "2:33:02"} +{"current_steps": 6040, "total_steps": 6748, "loss": 1.3775, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.345830940401191e-06, "epoch": 1.79, "percentage": 89.51, "elapsed_time": "21:27:26", "remaining_time": "2:30:54"} +{"current_steps": 6050, "total_steps": 6748, "loss": 1.3974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3084144331144439e-06, "epoch": 1.79, "percentage": 89.66, "elapsed_time": "21:29:31", "remaining_time": "2:28:46"} +{"current_steps": 6060, "total_steps": 6748, "loss": 1.3859, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2715114287744079e-06, "epoch": 1.8, "percentage": 89.8, "elapsed_time": "21:31:38", "remaining_time": "2:26:38"} +{"current_steps": 6070, "total_steps": 6748, "loss": 1.3795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2351227272347444e-06, "epoch": 1.8, "percentage": 89.95, "elapsed_time": "21:33:44", "remaining_time": "2:24:30"} +{"current_steps": 6080, "total_steps": 6748, "loss": 1.376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.19924911720187e-06, "epoch": 1.8, "percentage": 90.1, "elapsed_time": "21:35:51", "remaining_time": "2:22:22"} +{"current_steps": 6090, "total_steps": 6748, "loss": 1.3892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1638913762178489e-06, "epoch": 1.8, "percentage": 90.25, "elapsed_time": "21:37:59", "remaining_time": "2:20:14"} +{"current_steps": 6100, "total_steps": 6748, "loss": 1.3866, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1290502706435584e-06, "epoch": 1.81, "percentage": 90.4, "elapsed_time": "21:40:05", "remaining_time": "2:18:06"} +{"current_steps": 6110, "total_steps": 6748, "loss": 1.3911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0947265556420588e-06, "epoch": 1.81, "percentage": 90.55, "elapsed_time": "21:42:14", "remaining_time": "2:15:58"} +{"current_steps": 6120, "total_steps": 6748, "loss": 1.3535, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.060920975162244e-06, "epoch": 1.81, "percentage": 90.69, "elapsed_time": "21:44:18", "remaining_time": "2:13:50"} +{"current_steps": 6130, "total_steps": 6748, "loss": 1.3815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0276342619227024e-06, "epoch": 1.82, "percentage": 90.84, "elapsed_time": "21:46:24", "remaining_time": "2:11:42"} +{"current_steps": 6140, "total_steps": 6748, "loss": 1.3989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.948671373958468e-07, "epoch": 1.82, "percentage": 90.99, "elapsed_time": "21:48:37", "remaining_time": "2:09:35"} +{"current_steps": 6150, "total_steps": 6748, "loss": 1.3974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.626203117922672e-07, "epoch": 1.82, "percentage": 91.14, "elapsed_time": "21:50:43", "remaining_time": "2:07:26"} +{"current_steps": 6160, "total_steps": 6748, "loss": 1.3717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.308944840453415e-07, "epoch": 1.83, "percentage": 91.29, "elapsed_time": "21:53:01", "remaining_time": "2:05:20"} +{"current_steps": 6170, "total_steps": 6748, "loss": 1.3797, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.996903417960917e-07, "epoch": 1.83, "percentage": 91.43, "elapsed_time": "21:55:07", "remaining_time": "2:03:12"} +{"current_steps": 6180, "total_steps": 6748, "loss": 1.3846, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.690085613782706e-07, "epoch": 1.83, "percentage": 91.58, "elapsed_time": "21:57:18", "remaining_time": "2:01:04"} +{"current_steps": 6190, "total_steps": 6748, "loss": 1.379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.388498078037044e-07, "epoch": 1.83, "percentage": 91.73, "elapsed_time": "21:59:25", "remaining_time": "1:58:56"} +{"current_steps": 6200, "total_steps": 6748, "loss": 1.408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.092147347478873e-07, "epoch": 1.84, "percentage": 91.88, "elapsed_time": "22:01:28", "remaining_time": "1:56:48"} +{"current_steps": 6210, "total_steps": 6748, "loss": 1.3776, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.801039845358044e-07, "epoch": 1.84, "percentage": 92.03, "elapsed_time": "22:03:32", "remaining_time": "1:54:39"} +{"current_steps": 6220, "total_steps": 6748, "loss": 1.397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.515181881280115e-07, "epoch": 1.84, "percentage": 92.18, "elapsed_time": "22:05:40", "remaining_time": "1:52:31"} +{"current_steps": 6230, "total_steps": 6748, "loss": 1.3768, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.234579651069578e-07, "epoch": 1.85, "percentage": 92.32, "elapsed_time": "22:07:48", "remaining_time": "1:50:24"} +{"current_steps": 6240, "total_steps": 6748, "loss": 1.3807, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.959239236635662e-07, "epoch": 1.85, "percentage": 92.47, "elapsed_time": "22:09:56", "remaining_time": "1:48:16"} +{"current_steps": 6250, "total_steps": 6748, "loss": 1.3947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.68916660584043e-07, "epoch": 1.85, "percentage": 92.62, "elapsed_time": "22:12:00", "remaining_time": "1:46:08"} +{"current_steps": 6260, "total_steps": 6748, "loss": 1.3785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.424367612369364e-07, "epoch": 1.85, "percentage": 92.77, "elapsed_time": "22:14:05", "remaining_time": "1:44:00"} +{"current_steps": 6270, "total_steps": 6748, "loss": 1.3828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.164847995604656e-07, "epoch": 1.86, "percentage": 92.92, "elapsed_time": "22:16:15", "remaining_time": "1:41:52"} +{"current_steps": 6280, "total_steps": 6748, "loss": 1.3765, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.910613380500696e-07, "epoch": 1.86, "percentage": 93.06, "elapsed_time": "22:18:28", "remaining_time": "1:39:44"} +{"current_steps": 6290, "total_steps": 6748, "loss": 1.3994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.66166927746209e-07, "epoch": 1.86, "percentage": 93.21, "elapsed_time": "22:20:32", "remaining_time": "1:37:36"} +{"current_steps": 6300, "total_steps": 6748, "loss": 1.4081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.418021082224472e-07, "epoch": 1.87, "percentage": 93.36, "elapsed_time": "22:22:39", "remaining_time": "1:35:28"} +{"current_steps": 6310, "total_steps": 6748, "loss": 1.3885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.179674075737273e-07, "epoch": 1.87, "percentage": 93.51, "elapsed_time": "22:24:51", "remaining_time": "1:33:21"} +{"current_steps": 6320, "total_steps": 6748, "loss": 1.3748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.946633424049413e-07, "epoch": 1.87, "percentage": 93.66, "elapsed_time": "22:27:03", "remaining_time": "1:31:13"} +{"current_steps": 6330, "total_steps": 6748, "loss": 1.3988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7189041781973144e-07, "epoch": 1.88, "percentage": 93.81, "elapsed_time": "22:29:14", "remaining_time": "1:29:05"} +{"current_steps": 6340, "total_steps": 6748, "loss": 1.3815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4964912740954045e-07, "epoch": 1.88, "percentage": 93.95, "elapsed_time": "22:31:24", "remaining_time": "1:26:58"} +{"current_steps": 6350, "total_steps": 6748, "loss": 1.3736, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2793995324290903e-07, "epoch": 1.88, "percentage": 94.1, "elapsed_time": "22:33:35", "remaining_time": "1:24:50"} +{"current_steps": 6360, "total_steps": 6748, "loss": 1.3847, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.067633658550396e-07, "epoch": 1.88, "percentage": 94.25, "elapsed_time": "22:35:52", "remaining_time": "1:22:42"} +{"current_steps": 6370, "total_steps": 6748, "loss": 1.3907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.861198242375852e-07, "epoch": 1.89, "percentage": 94.4, "elapsed_time": "22:38:00", "remaining_time": "1:20:35"} +{"current_steps": 6380, "total_steps": 6748, "loss": 1.3914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.660097758287018e-07, "epoch": 1.89, "percentage": 94.55, "elapsed_time": "22:40:09", "remaining_time": "1:18:27"} +{"current_steps": 6390, "total_steps": 6748, "loss": 1.3861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.464336565033588e-07, "epoch": 1.89, "percentage": 94.69, "elapsed_time": "22:42:20", "remaining_time": "1:16:19"} +{"current_steps": 6400, "total_steps": 6748, "loss": 1.3937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.273918905638912e-07, "epoch": 1.9, "percentage": 94.84, "elapsed_time": "22:44:26", "remaining_time": "1:14:11"} +{"current_steps": 6410, "total_steps": 6748, "loss": 1.3783, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.088848907307873e-07, "epoch": 1.9, "percentage": 94.99, "elapsed_time": "22:46:36", "remaining_time": "1:12:03"} +{"current_steps": 6420, "total_steps": 6748, "loss": 1.4057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.909130581337655e-07, "epoch": 1.9, "percentage": 95.14, "elapsed_time": "22:48:44", "remaining_time": "1:09:55"} +{"current_steps": 6430, "total_steps": 6748, "loss": 1.3672, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7347678230306427e-07, "epoch": 1.91, "percentage": 95.29, "elapsed_time": "22:50:51", "remaining_time": "1:07:47"} +{"current_steps": 6440, "total_steps": 6748, "loss": 1.3742, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5657644116100497e-07, "epoch": 1.91, "percentage": 95.44, "elapsed_time": "22:53:00", "remaining_time": "1:05:39"} +{"current_steps": 6450, "total_steps": 6748, "loss": 1.3964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4021240101379793e-07, "epoch": 1.91, "percentage": 95.58, "elapsed_time": "22:55:06", "remaining_time": "1:03:31"} +{"current_steps": 6460, "total_steps": 6748, "loss": 1.4025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.243850165436129e-07, "epoch": 1.91, "percentage": 95.73, "elapsed_time": "22:57:16", "remaining_time": "1:01:24"} +{"current_steps": 6470, "total_steps": 6748, "loss": 1.386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0909463080087123e-07, "epoch": 1.92, "percentage": 95.88, "elapsed_time": "22:59:23", "remaining_time": "0:59:16"} +{"current_steps": 6480, "total_steps": 6748, "loss": 1.3875, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9434157519683248e-07, "epoch": 1.92, "percentage": 96.03, "elapsed_time": "23:01:35", "remaining_time": "0:57:08"} +{"current_steps": 6490, "total_steps": 6748, "loss": 1.3945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8012616949640015e-07, "epoch": 1.92, "percentage": 96.18, "elapsed_time": "23:03:42", "remaining_time": "0:55:00"} +{"current_steps": 6500, "total_steps": 6748, "loss": 1.3725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6644872181118543e-07, "epoch": 1.93, "percentage": 96.32, "elapsed_time": "23:05:49", "remaining_time": "0:52:52"} +{"current_steps": 6510, "total_steps": 6748, "loss": 1.3732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.533095285928432e-07, "epoch": 1.93, "percentage": 96.47, "elapsed_time": "23:07:54", "remaining_time": "0:50:44"} +{"current_steps": 6520, "total_steps": 6748, "loss": 1.3791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4070887462663552e-07, "epoch": 1.93, "percentage": 96.62, "elapsed_time": "23:10:00", "remaining_time": "0:48:36"} +{"current_steps": 6530, "total_steps": 6748, "loss": 1.3922, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2864703302526703e-07, "epoch": 1.93, "percentage": 96.77, "elapsed_time": "23:12:09", "remaining_time": "0:46:28"} +{"current_steps": 6540, "total_steps": 6748, "loss": 1.3737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.171242652229565e-07, "epoch": 1.94, "percentage": 96.92, "elapsed_time": "23:14:17", "remaining_time": "0:44:20"} +{"current_steps": 6550, "total_steps": 6748, "loss": 1.3879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0614082096977729e-07, "epoch": 1.94, "percentage": 97.07, "elapsed_time": "23:16:26", "remaining_time": "0:42:12"} +{"current_steps": 6560, "total_steps": 6748, "loss": 1.3962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.569693832623961e-08, "epoch": 1.94, "percentage": 97.21, "elapsed_time": "23:18:32", "remaining_time": "0:40:04"} +{"current_steps": 6570, "total_steps": 6748, "loss": 1.4029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.579284365813623e-08, "epoch": 1.95, "percentage": 97.36, "elapsed_time": "23:20:39", "remaining_time": "0:37:56"} +{"current_steps": 6580, "total_steps": 6748, "loss": 1.3903, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.642875163162977e-08, "epoch": 1.95, "percentage": 97.51, "elapsed_time": "23:22:48", "remaining_time": "0:35:48"} +{"current_steps": 6590, "total_steps": 6748, "loss": 1.3986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.760486520860643e-08, "epoch": 1.95, "percentage": 97.66, "elapsed_time": "23:24:50", "remaining_time": "0:33:40"} +{"current_steps": 6600, "total_steps": 6748, "loss": 1.3624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.932137564227391e-08, "epoch": 1.96, "percentage": 97.81, "elapsed_time": "23:26:54", "remaining_time": "0:31:32"} +{"current_steps": 6610, "total_steps": 6748, "loss": 1.3732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.157846247300646e-08, "epoch": 1.96, "percentage": 97.95, "elapsed_time": "23:29:05", "remaining_time": "0:29:25"} +{"current_steps": 6620, "total_steps": 6748, "loss": 1.3838, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.43762935244757e-08, "epoch": 1.96, "percentage": 98.1, "elapsed_time": "23:31:15", "remaining_time": "0:27:17"} +{"current_steps": 6630, "total_steps": 6748, "loss": 1.3711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7715024899989684e-08, "epoch": 1.96, "percentage": 98.25, "elapsed_time": "23:33:23", "remaining_time": "0:25:09"} +{"current_steps": 6640, "total_steps": 6748, "loss": 1.3974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.159480097912615e-08, "epoch": 1.97, "percentage": 98.4, "elapsed_time": "23:35:35", "remaining_time": "0:23:01"} +{"current_steps": 6650, "total_steps": 6748, "loss": 1.3918, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6015754414593363e-08, "epoch": 1.97, "percentage": 98.55, "elapsed_time": "23:37:45", "remaining_time": "0:20:53"} +{"current_steps": 6660, "total_steps": 6748, "loss": 1.3933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.097800612936296e-08, "epoch": 1.97, "percentage": 98.7, "elapsed_time": "23:39:51", "remaining_time": "0:18:45"} +{"current_steps": 6670, "total_steps": 6748, "loss": 1.3815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6481665314035944e-08, "epoch": 1.98, "percentage": 98.84, "elapsed_time": "23:41:55", "remaining_time": "0:16:37"} +{"current_steps": 6680, "total_steps": 6748, "loss": 1.3671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2526829424494569e-08, "epoch": 1.98, "percentage": 98.99, "elapsed_time": "23:44:04", "remaining_time": "0:14:29"} +{"current_steps": 6690, "total_steps": 6748, "loss": 1.4008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.113584179770707e-09, "epoch": 1.98, "percentage": 99.14, "elapsed_time": "23:46:10", "remaining_time": "0:12:21"} +{"current_steps": 6700, "total_steps": 6748, "loss": 1.3876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.242003560205656e-09, "epoch": 1.99, "percentage": 99.29, "elapsed_time": "23:48:11", "remaining_time": "0:10:13"} +{"current_steps": 6710, "total_steps": 6748, "loss": 1.3722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9121498058292124e-09, "epoch": 1.99, "percentage": 99.44, "elapsed_time": "23:50:20", "remaining_time": "0:08:06"} +{"current_steps": 6720, "total_steps": 6748, "loss": 1.3823, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.124073415030181e-09, "epoch": 1.99, "percentage": 99.59, "elapsed_time": "23:52:30", "remaining_time": "0:05:58"} +{"current_steps": 6730, "total_steps": 6748, "loss": 1.3815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.778131434433734e-10, "epoch": 1.99, "percentage": 99.73, "elapsed_time": "23:54:35", "remaining_time": "0:03:50"} +{"current_steps": 6740, "total_steps": 6748, "loss": 1.3889, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7339600311971903e-10, "epoch": 2.0, "percentage": 99.88, "elapsed_time": "23:56:42", "remaining_time": "0:01:42"} +{"current_steps": 6748, "total_steps": 6748, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "23:58:23", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..b6f0f4c17f093b02a059ff244283452c080044b5 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,4069 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.999555522631306, + "global_step": 6748, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.9999729068921297e-05, + "loss": 1.8898, + "step": 10 + }, + { + "epoch": 0.01, + "learning_rate": 4.9998916281557476e-05, + "loss": 1.7273, + "step": 20 + }, + { + "epoch": 0.01, + "learning_rate": 4.999756165552527e-05, + "loss": 1.6799, + "step": 30 + }, + { + "epoch": 0.01, + "learning_rate": 4.999566522018553e-05, + "loss": 1.6431, + "step": 40 + }, + { + "epoch": 0.01, + "learning_rate": 4.999322701664249e-05, + "loss": 1.6153, + "step": 50 + }, + { + "epoch": 0.02, + "learning_rate": 4.9990247097742984e-05, + "loss": 1.5933, + "step": 60 + }, + { + "epoch": 0.02, + "learning_rate": 4.9986725528075205e-05, + "loss": 1.5913, + "step": 70 + }, + { + "epoch": 0.02, + "learning_rate": 4.998266238396737e-05, + "loss": 1.5434, + "step": 80 + }, + { + "epoch": 0.03, + "learning_rate": 4.997805775348605e-05, + "loss": 1.5304, + "step": 90 + }, + { + "epoch": 0.03, + "learning_rate": 4.997291173643424e-05, + "loss": 1.5531, + "step": 100 + }, + { + "epoch": 0.03, + "learning_rate": 4.996722444434921e-05, + "loss": 1.5446, + "step": 110 + }, + { + "epoch": 0.04, + "learning_rate": 4.99609960005001e-05, + "loss": 1.5352, + "step": 120 + }, + { + "epoch": 0.04, + "learning_rate": 4.995422653988524e-05, + "loss": 1.5303, + "step": 130 + }, + { + "epoch": 0.04, + "learning_rate": 4.994691620922919e-05, + "loss": 1.5449, + "step": 140 + }, + { + "epoch": 0.04, + "learning_rate": 4.993906516697964e-05, + "loss": 1.5114, + "step": 150 + }, + { + "epoch": 0.05, + "learning_rate": 4.9930673583303865e-05, + "loss": 1.5043, + "step": 160 + }, + { + "epoch": 0.05, + "learning_rate": 4.992174164008515e-05, + "loss": 1.5476, + "step": 170 + }, + { + "epoch": 0.05, + "learning_rate": 4.991226953091877e-05, + "loss": 1.5107, + "step": 180 + }, + { + "epoch": 0.06, + "learning_rate": 4.9902257461107824e-05, + "loss": 1.5104, + "step": 190 + }, + { + "epoch": 0.06, + "learning_rate": 4.9891705647658795e-05, + "loss": 1.5298, + "step": 200 + }, + { + "epoch": 0.06, + "learning_rate": 4.988061431927681e-05, + "loss": 1.4907, + "step": 210 + }, + { + "epoch": 0.07, + "learning_rate": 4.986898371636071e-05, + "loss": 1.5127, + "step": 220 + }, + { + "epoch": 0.07, + "learning_rate": 4.985681409099784e-05, + "loss": 1.5037, + "step": 230 + }, + { + "epoch": 0.07, + "learning_rate": 4.984410570695858e-05, + "loss": 1.5029, + "step": 240 + }, + { + "epoch": 0.07, + "learning_rate": 4.983085883969063e-05, + "loss": 1.4725, + "step": 250 + }, + { + "epoch": 0.08, + "learning_rate": 4.981707377631303e-05, + "loss": 1.5148, + "step": 260 + }, + { + "epoch": 0.08, + "learning_rate": 4.9802750815609936e-05, + "loss": 1.4993, + "step": 270 + }, + { + "epoch": 0.08, + "learning_rate": 4.978789026802419e-05, + "loss": 1.5006, + "step": 280 + }, + { + "epoch": 0.09, + "learning_rate": 4.9772492455650494e-05, + "loss": 1.4885, + "step": 290 + }, + { + "epoch": 0.09, + "learning_rate": 4.975655771222855e-05, + "loss": 1.4898, + "step": 300 + }, + { + "epoch": 0.09, + "learning_rate": 4.9740086383135706e-05, + "loss": 1.4906, + "step": 310 + }, + { + "epoch": 0.09, + "learning_rate": 4.97230788253796e-05, + "loss": 1.4796, + "step": 320 + }, + { + "epoch": 0.1, + "learning_rate": 4.970553540759028e-05, + "loss": 1.4861, + "step": 330 + }, + { + "epoch": 0.1, + "learning_rate": 4.968745651001231e-05, + "loss": 1.4827, + "step": 340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9668842524496526e-05, + "loss": 1.4884, + "step": 350 + }, + { + "epoch": 0.11, + "learning_rate": 4.964969385449149e-05, + "loss": 1.4873, + "step": 360 + }, + { + "epoch": 0.11, + "learning_rate": 4.96300109150348e-05, + "loss": 1.4848, + "step": 370 + }, + { + "epoch": 0.11, + "learning_rate": 4.960979413274404e-05, + "loss": 1.4881, + "step": 380 + }, + { + "epoch": 0.12, + "learning_rate": 4.9589043945807594e-05, + "loss": 1.4618, + "step": 390 + }, + { + "epoch": 0.12, + "learning_rate": 4.9567760803975105e-05, + "loss": 1.4858, + "step": 400 + }, + { + "epoch": 0.12, + "learning_rate": 4.954594516854773e-05, + "loss": 1.4777, + "step": 410 + }, + { + "epoch": 0.12, + "learning_rate": 4.952359751236817e-05, + "loss": 1.4828, + "step": 420 + }, + { + "epoch": 0.13, + "learning_rate": 4.950071831981038e-05, + "loss": 1.4571, + "step": 430 + }, + { + "epoch": 0.13, + "learning_rate": 4.9477308086769117e-05, + "loss": 1.4724, + "step": 440 + }, + { + "epoch": 0.13, + "learning_rate": 4.945336732064915e-05, + "loss": 1.4771, + "step": 450 + }, + { + "epoch": 0.14, + "learning_rate": 4.9428896540354294e-05, + "loss": 1.4604, + "step": 460 + }, + { + "epoch": 0.14, + "learning_rate": 4.940389627627613e-05, + "loss": 1.4815, + "step": 470 + }, + { + "epoch": 0.14, + "learning_rate": 4.937836707028255e-05, + "loss": 1.4859, + "step": 480 + }, + { + "epoch": 0.15, + "learning_rate": 4.935230947570597e-05, + "loss": 1.4715, + "step": 490 + }, + { + "epoch": 0.15, + "learning_rate": 4.932572405733137e-05, + "loss": 1.4759, + "step": 500 + }, + { + "epoch": 0.15, + "learning_rate": 4.929861139138404e-05, + "loss": 1.4678, + "step": 510 + }, + { + "epoch": 0.15, + "learning_rate": 4.9270972065517083e-05, + "loss": 1.4754, + "step": 520 + }, + { + "epoch": 0.16, + "learning_rate": 4.924280667879869e-05, + "loss": 1.462, + "step": 530 + }, + { + "epoch": 0.16, + "learning_rate": 4.921411584169915e-05, + "loss": 1.4704, + "step": 540 + }, + { + "epoch": 0.16, + "learning_rate": 4.918490017607761e-05, + "loss": 1.4661, + "step": 550 + }, + { + "epoch": 0.17, + "learning_rate": 4.915516031516863e-05, + "loss": 1.471, + "step": 560 + }, + { + "epoch": 0.17, + "learning_rate": 4.912489690356841e-05, + "loss": 1.451, + "step": 570 + }, + { + "epoch": 0.17, + "learning_rate": 4.909411059722084e-05, + "loss": 1.4411, + "step": 580 + }, + { + "epoch": 0.17, + "learning_rate": 4.9062802063403316e-05, + "loss": 1.456, + "step": 590 + }, + { + "epoch": 0.18, + "learning_rate": 4.90309719807122e-05, + "loss": 1.4678, + "step": 600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8998621039048205e-05, + "loss": 1.479, + "step": 610 + }, + { + "epoch": 0.18, + "learning_rate": 4.896574993960136e-05, + "loss": 1.4471, + "step": 620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893235939483587e-05, + "loss": 1.453, + "step": 630 + }, + { + "epoch": 0.19, + "learning_rate": 4.8898450128474626e-05, + "loss": 1.4696, + "step": 640 + }, + { + "epoch": 0.19, + "learning_rate": 4.886402287548357e-05, + "loss": 1.4526, + "step": 650 + }, + { + "epoch": 0.2, + "learning_rate": 4.8829078382055725e-05, + "loss": 1.4429, + "step": 660 + }, + { + "epoch": 0.2, + "learning_rate": 4.8793617405595025e-05, + "loss": 1.4491, + "step": 670 + }, + { + "epoch": 0.2, + "learning_rate": 4.8757640714699924e-05, + "loss": 1.4411, + "step": 680 + }, + { + "epoch": 0.2, + "learning_rate": 4.872114908914671e-05, + "loss": 1.4543, + "step": 690 + }, + { + "epoch": 0.21, + "learning_rate": 4.8684143319872636e-05, + "loss": 1.4556, + "step": 700 + }, + { + "epoch": 0.21, + "learning_rate": 4.864662420895873e-05, + "loss": 1.4506, + "step": 710 + }, + { + "epoch": 0.21, + "learning_rate": 4.860859256961244e-05, + "loss": 1.4671, + "step": 720 + }, + { + "epoch": 0.22, + "learning_rate": 4.857004922615002e-05, + "loss": 1.4469, + "step": 730 + }, + { + "epoch": 0.22, + "learning_rate": 4.8530995013978645e-05, + "loss": 1.4554, + "step": 740 + }, + { + "epoch": 0.22, + "learning_rate": 4.84914307795783e-05, + "loss": 1.4671, + "step": 750 + }, + { + "epoch": 0.23, + "learning_rate": 4.845135738048343e-05, + "loss": 1.445, + "step": 760 + }, + { + "epoch": 0.23, + "learning_rate": 4.841077568526439e-05, + "loss": 1.4469, + "step": 770 + }, + { + "epoch": 0.23, + "learning_rate": 4.836968657350857e-05, + "loss": 1.4677, + "step": 780 + }, + { + "epoch": 0.23, + "learning_rate": 4.832809093580135e-05, + "loss": 1.4653, + "step": 790 + }, + { + "epoch": 0.24, + "learning_rate": 4.8285989673706826e-05, + "loss": 1.4342, + "step": 800 + }, + { + "epoch": 0.24, + "learning_rate": 4.824338369974822e-05, + "loss": 1.458, + "step": 810 + }, + { + "epoch": 0.24, + "learning_rate": 4.8200273937388126e-05, + "loss": 1.4541, + "step": 820 + }, + { + "epoch": 0.25, + "learning_rate": 4.81566613210085e-05, + "loss": 1.4324, + "step": 830 + }, + { + "epoch": 0.25, + "learning_rate": 4.81125467958904e-05, + "loss": 1.4405, + "step": 840 + }, + { + "epoch": 0.25, + "learning_rate": 4.80679313181935e-05, + "loss": 1.4408, + "step": 850 + }, + { + "epoch": 0.25, + "learning_rate": 4.8022815854935356e-05, + "loss": 1.4395, + "step": 860 + }, + { + "epoch": 0.26, + "learning_rate": 4.797720138397045e-05, + "loss": 1.4359, + "step": 870 + }, + { + "epoch": 0.26, + "learning_rate": 4.793108889396902e-05, + "loss": 1.442, + "step": 880 + }, + { + "epoch": 0.26, + "learning_rate": 4.7884479384395594e-05, + "loss": 1.4566, + "step": 890 + }, + { + "epoch": 0.27, + "learning_rate": 4.7837373865487345e-05, + "loss": 1.4257, + "step": 900 + }, + { + "epoch": 0.27, + "learning_rate": 4.77897733582322e-05, + "loss": 1.4755, + "step": 910 + }, + { + "epoch": 0.27, + "learning_rate": 4.774167889434671e-05, + "loss": 1.4476, + "step": 920 + }, + { + "epoch": 0.28, + "learning_rate": 4.769309151625366e-05, + "loss": 1.4531, + "step": 930 + }, + { + "epoch": 0.28, + "learning_rate": 4.7644012277059516e-05, + "loss": 1.447, + "step": 940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7594442240531574e-05, + "loss": 1.4201, + "step": 950 + }, + { + "epoch": 0.28, + "learning_rate": 4.754438248107491e-05, + "loss": 1.4323, + "step": 960 + }, + { + "epoch": 0.29, + "learning_rate": 4.7493834083709104e-05, + "loss": 1.4432, + "step": 970 + }, + { + "epoch": 0.29, + "learning_rate": 4.7442798144044695e-05, + "loss": 1.4339, + "step": 980 + }, + { + "epoch": 0.29, + "learning_rate": 4.739127576825945e-05, + "loss": 1.4477, + "step": 990 + }, + { + "epoch": 0.3, + "learning_rate": 4.733926807307441e-05, + "loss": 1.4242, + "step": 1000 + }, + { + "epoch": 0.3, + "learning_rate": 4.728677618572965e-05, + "loss": 1.4341, + "step": 1010 + }, + { + "epoch": 0.3, + "learning_rate": 4.723380124395985e-05, + "loss": 1.4526, + "step": 1020 + }, + { + "epoch": 0.31, + "learning_rate": 4.7180344395969675e-05, + "loss": 1.4402, + "step": 1030 + }, + { + "epoch": 0.31, + "learning_rate": 4.712640680040884e-05, + "loss": 1.4257, + "step": 1040 + }, + { + "epoch": 0.31, + "learning_rate": 4.707198962634701e-05, + "loss": 1.4232, + "step": 1050 + }, + { + "epoch": 0.31, + "learning_rate": 4.70170940532485e-05, + "loss": 1.4485, + "step": 1060 + }, + { + "epoch": 0.32, + "learning_rate": 4.6961721270946635e-05, + "loss": 1.456, + "step": 1070 + }, + { + "epoch": 0.32, + "learning_rate": 4.690587247961804e-05, + "loss": 1.4555, + "step": 1080 + }, + { + "epoch": 0.32, + "learning_rate": 4.684954888975657e-05, + "loss": 1.4376, + "step": 1090 + }, + { + "epoch": 0.33, + "learning_rate": 4.6792751722147104e-05, + "loss": 1.4353, + "step": 1100 + }, + { + "epoch": 0.33, + "learning_rate": 4.6735482207839074e-05, + "loss": 1.4226, + "step": 1110 + }, + { + "epoch": 0.33, + "learning_rate": 4.6677741588119784e-05, + "loss": 1.4315, + "step": 1120 + }, + { + "epoch": 0.33, + "learning_rate": 4.66195311144875e-05, + "loss": 1.4303, + "step": 1130 + }, + { + "epoch": 0.34, + "learning_rate": 4.6560852048624345e-05, + "loss": 1.4288, + "step": 1140 + }, + { + "epoch": 0.34, + "learning_rate": 4.650170566236892e-05, + "loss": 1.4539, + "step": 1150 + }, + { + "epoch": 0.34, + "learning_rate": 4.6442093237688756e-05, + "loss": 1.4527, + "step": 1160 + }, + { + "epoch": 0.35, + "learning_rate": 4.6382016066652556e-05, + "loss": 1.4406, + "step": 1170 + }, + { + "epoch": 0.35, + "learning_rate": 4.632147545140212e-05, + "loss": 1.4233, + "step": 1180 + }, + { + "epoch": 0.35, + "learning_rate": 4.626047270412419e-05, + "loss": 1.426, + "step": 1190 + }, + { + "epoch": 0.36, + "learning_rate": 4.619900914702198e-05, + "loss": 1.4577, + "step": 1200 + }, + { + "epoch": 0.36, + "learning_rate": 4.613708611228652e-05, + "loss": 1.4313, + "step": 1210 + }, + { + "epoch": 0.36, + "learning_rate": 4.607470494206776e-05, + "loss": 1.4129, + "step": 1220 + }, + { + "epoch": 0.36, + "learning_rate": 4.601186698844554e-05, + "loss": 1.4368, + "step": 1230 + }, + { + "epoch": 0.37, + "learning_rate": 4.594857361340021e-05, + "loss": 1.4342, + "step": 1240 + }, + { + "epoch": 0.37, + "learning_rate": 4.588482618878316e-05, + "loss": 1.4438, + "step": 1250 + }, + { + "epoch": 0.37, + "learning_rate": 4.582062609628709e-05, + "loss": 1.4263, + "step": 1260 + }, + { + "epoch": 0.38, + "learning_rate": 4.575597472741601e-05, + "loss": 1.4379, + "step": 1270 + }, + { + "epoch": 0.38, + "learning_rate": 4.569087348345512e-05, + "loss": 1.4221, + "step": 1280 + }, + { + "epoch": 0.38, + "learning_rate": 4.562532377544046e-05, + "loss": 1.4414, + "step": 1290 + }, + { + "epoch": 0.39, + "learning_rate": 4.5559327024128265e-05, + "loss": 1.4395, + "step": 1300 + }, + { + "epoch": 0.39, + "learning_rate": 4.549288465996421e-05, + "loss": 1.4278, + "step": 1310 + }, + { + "epoch": 0.39, + "learning_rate": 4.542599812305243e-05, + "loss": 1.4344, + "step": 1320 + }, + { + "epoch": 0.39, + "learning_rate": 4.535866886312423e-05, + "loss": 1.4352, + "step": 1330 + }, + { + "epoch": 0.4, + "learning_rate": 4.529089833950675e-05, + "loss": 1.4133, + "step": 1340 + }, + { + "epoch": 0.4, + "learning_rate": 4.5222688021091266e-05, + "loss": 1.4506, + "step": 1350 + }, + { + "epoch": 0.4, + "learning_rate": 4.5154039386301385e-05, + "loss": 1.4295, + "step": 1360 + }, + { + "epoch": 0.41, + "learning_rate": 4.5084953923061016e-05, + "loss": 1.4389, + "step": 1370 + }, + { + "epoch": 0.41, + "learning_rate": 4.5015433128762065e-05, + "loss": 1.4247, + "step": 1380 + }, + { + "epoch": 0.41, + "learning_rate": 4.494547851023205e-05, + "loss": 1.4347, + "step": 1390 + }, + { + "epoch": 0.41, + "learning_rate": 4.487509158370139e-05, + "loss": 1.4133, + "step": 1400 + }, + { + "epoch": 0.42, + "learning_rate": 4.480427387477056e-05, + "loss": 1.4296, + "step": 1410 + }, + { + "epoch": 0.42, + "learning_rate": 4.473302691837702e-05, + "loss": 1.4353, + "step": 1420 + }, + { + "epoch": 0.42, + "learning_rate": 4.466135225876194e-05, + "loss": 1.4377, + "step": 1430 + }, + { + "epoch": 0.43, + "learning_rate": 4.458925144943676e-05, + "loss": 1.4168, + "step": 1440 + }, + { + "epoch": 0.43, + "learning_rate": 4.451672605314948e-05, + "loss": 1.4334, + "step": 1450 + }, + { + "epoch": 0.43, + "learning_rate": 4.444377764185082e-05, + "loss": 1.44, + "step": 1460 + }, + { + "epoch": 0.44, + "learning_rate": 4.43704077966601e-05, + "loss": 1.4375, + "step": 1470 + }, + { + "epoch": 0.44, + "learning_rate": 4.4296618107831036e-05, + "loss": 1.447, + "step": 1480 + }, + { + "epoch": 0.44, + "learning_rate": 4.422241017471722e-05, + "loss": 1.4151, + "step": 1490 + }, + { + "epoch": 0.44, + "learning_rate": 4.414778560573749e-05, + "loss": 1.4388, + "step": 1500 + }, + { + "epoch": 0.45, + "learning_rate": 4.4072746018341036e-05, + "loss": 1.4228, + "step": 1510 + }, + { + "epoch": 0.45, + "learning_rate": 4.399729303897238e-05, + "loss": 1.4104, + "step": 1520 + }, + { + "epoch": 0.45, + "learning_rate": 4.392142830303608e-05, + "loss": 1.4441, + "step": 1530 + }, + { + "epoch": 0.46, + "learning_rate": 4.384515345486131e-05, + "loss": 1.4282, + "step": 1540 + }, + { + "epoch": 0.46, + "learning_rate": 4.376847014766623e-05, + "loss": 1.4271, + "step": 1550 + }, + { + "epoch": 0.46, + "learning_rate": 4.369138004352212e-05, + "loss": 1.4223, + "step": 1560 + }, + { + "epoch": 0.47, + "learning_rate": 4.3613884813317406e-05, + "loss": 1.425, + "step": 1570 + }, + { + "epoch": 0.47, + "learning_rate": 4.3535986136721377e-05, + "loss": 1.4392, + "step": 1580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3457685702147834e-05, + "loss": 1.4097, + "step": 1590 + }, + { + "epoch": 0.47, + "learning_rate": 4.3378985206718484e-05, + "loss": 1.4405, + "step": 1600 + }, + { + "epoch": 0.48, + "learning_rate": 4.329988635622611e-05, + "loss": 1.4311, + "step": 1610 + }, + { + "epoch": 0.48, + "learning_rate": 4.322039086509769e-05, + "loss": 1.4358, + "step": 1620 + }, + { + "epoch": 0.48, + "learning_rate": 4.3140500456357145e-05, + "loss": 1.4114, + "step": 1630 + }, + { + "epoch": 0.49, + "learning_rate": 4.306021686158805e-05, + "loss": 1.4165, + "step": 1640 + }, + { + "epoch": 0.49, + "learning_rate": 4.297954182089609e-05, + "loss": 1.4309, + "step": 1650 + }, + { + "epoch": 0.49, + "learning_rate": 4.289847708287129e-05, + "loss": 1.4215, + "step": 1660 + }, + { + "epoch": 0.49, + "learning_rate": 4.2817024404550246e-05, + "loss": 1.4124, + "step": 1670 + }, + { + "epoch": 0.5, + "learning_rate": 4.2735185551377895e-05, + "loss": 1.4001, + "step": 1680 + }, + { + "epoch": 0.5, + "learning_rate": 4.265296229716935e-05, + "loss": 1.4302, + "step": 1690 + }, + { + "epoch": 0.5, + "learning_rate": 4.25703564240714e-05, + "loss": 1.4211, + "step": 1700 + }, + { + "epoch": 0.51, + "learning_rate": 4.2487369722523906e-05, + "loss": 1.4423, + "step": 1710 + }, + { + "epoch": 0.51, + "learning_rate": 4.240400399122101e-05, + "loss": 1.4299, + "step": 1720 + }, + { + "epoch": 0.51, + "learning_rate": 4.232026103707209e-05, + "loss": 1.4214, + "step": 1730 + }, + { + "epoch": 0.52, + "learning_rate": 4.223614267516268e-05, + "loss": 1.4348, + "step": 1740 + }, + { + "epoch": 0.52, + "learning_rate": 4.215165072871505e-05, + "loss": 1.4315, + "step": 1750 + }, + { + "epoch": 0.52, + "learning_rate": 4.206678702904874e-05, + "loss": 1.4098, + "step": 1760 + }, + { + "epoch": 0.52, + "learning_rate": 4.198155341554084e-05, + "loss": 1.4242, + "step": 1770 + }, + { + "epoch": 0.53, + "learning_rate": 4.1895951735586145e-05, + "loss": 1.4272, + "step": 1780 + }, + { + "epoch": 0.53, + "learning_rate": 4.1809983844557085e-05, + "loss": 1.4452, + "step": 1790 + }, + { + "epoch": 0.53, + "learning_rate": 4.172365160576355e-05, + "loss": 1.431, + "step": 1800 + }, + { + "epoch": 0.54, + "learning_rate": 4.163695689041245e-05, + "loss": 1.4389, + "step": 1810 + }, + { + "epoch": 0.54, + "learning_rate": 4.154990157756722e-05, + "loss": 1.413, + "step": 1820 + }, + { + "epoch": 0.54, + "learning_rate": 4.1462487554107036e-05, + "loss": 1.3893, + "step": 1830 + }, + { + "epoch": 0.55, + "learning_rate": 4.137471671468596e-05, + "loss": 1.4052, + "step": 1840 + }, + { + "epoch": 0.55, + "learning_rate": 4.128659096169183e-05, + "loss": 1.4173, + "step": 1850 + }, + { + "epoch": 0.55, + "learning_rate": 4.1198112205205096e-05, + "loss": 1.4012, + "step": 1860 + }, + { + "epoch": 0.55, + "learning_rate": 4.110928236295734e-05, + "loss": 1.4119, + "step": 1870 + }, + { + "epoch": 0.56, + "learning_rate": 4.102010336028975e-05, + "loss": 1.4111, + "step": 1880 + }, + { + "epoch": 0.56, + "learning_rate": 4.0930577130111424e-05, + "loss": 1.4156, + "step": 1890 + }, + { + "epoch": 0.56, + "learning_rate": 4.084070561285739e-05, + "loss": 1.4419, + "step": 1900 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750490756446624e-05, + "loss": 1.4121, + "step": 1910 + }, + { + "epoch": 0.57, + "learning_rate": 4.0659934516239795e-05, + "loss": 1.4204, + "step": 1920 + }, + { + "epoch": 0.57, + "learning_rate": 4.056903885499689e-05, + "loss": 1.4032, + "step": 1930 + }, + { + "epoch": 0.57, + "learning_rate": 4.047780574283466e-05, + "loss": 1.4207, + "step": 1940 + }, + { + "epoch": 0.58, + "learning_rate": 4.038623715718397e-05, + "loss": 1.4095, + "step": 1950 + }, + { + "epoch": 0.58, + "learning_rate": 4.029433508274686e-05, + "loss": 1.4228, + "step": 1960 + }, + { + "epoch": 0.58, + "learning_rate": 4.0202101511453586e-05, + "loss": 1.4141, + "step": 1970 + }, + { + "epoch": 0.59, + "learning_rate": 4.010953844241943e-05, + "loss": 1.4323, + "step": 1980 + }, + { + "epoch": 0.59, + "learning_rate": 4.001664788190135e-05, + "loss": 1.4087, + "step": 1990 + }, + { + "epoch": 0.59, + "learning_rate": 3.992343184325453e-05, + "loss": 1.4186, + "step": 2000 + }, + { + "epoch": 0.6, + "learning_rate": 3.982989234688873e-05, + "loss": 1.4264, + "step": 2010 + }, + { + "epoch": 0.6, + "learning_rate": 3.973603142022448e-05, + "loss": 1.4417, + "step": 2020 + }, + { + "epoch": 0.6, + "learning_rate": 3.964185109764915e-05, + "loss": 1.4075, + "step": 2030 + }, + { + "epoch": 0.6, + "learning_rate": 3.954735342047285e-05, + "loss": 1.4143, + "step": 2040 + }, + { + "epoch": 0.61, + "learning_rate": 3.945254043688419e-05, + "loss": 1.4176, + "step": 2050 + }, + { + "epoch": 0.61, + "learning_rate": 3.935741420190587e-05, + "loss": 1.4214, + "step": 2060 + }, + { + "epoch": 0.61, + "learning_rate": 3.926197677735018e-05, + "loss": 1.4256, + "step": 2070 + }, + { + "epoch": 0.62, + "learning_rate": 3.9166230231774276e-05, + "loss": 1.4075, + "step": 2080 + }, + { + "epoch": 0.62, + "learning_rate": 3.9070176640435335e-05, + "loss": 1.3887, + "step": 2090 + }, + { + "epoch": 0.62, + "learning_rate": 3.897381808524562e-05, + "loss": 1.4225, + "step": 2100 + }, + { + "epoch": 0.63, + "learning_rate": 3.887715665472729e-05, + "loss": 1.4114, + "step": 2110 + }, + { + "epoch": 0.63, + "learning_rate": 3.8780194443967226e-05, + "loss": 1.4316, + "step": 2120 + }, + { + "epoch": 0.63, + "learning_rate": 3.8682933554571524e-05, + "loss": 1.4168, + "step": 2130 + }, + { + "epoch": 0.63, + "learning_rate": 3.858537609461999e-05, + "loss": 1.4237, + "step": 2140 + }, + { + "epoch": 0.64, + "learning_rate": 3.8487524178620464e-05, + "loss": 1.4373, + "step": 2150 + }, + { + "epoch": 0.64, + "learning_rate": 3.838937992746295e-05, + "loss": 1.4089, + "step": 2160 + }, + { + "epoch": 0.64, + "learning_rate": 3.8290945468373684e-05, + "loss": 1.4319, + "step": 2170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8192222934869e-05, + "loss": 1.4035, + "step": 2180 + }, + { + "epoch": 0.65, + "learning_rate": 3.809321446670909e-05, + "loss": 1.4161, + "step": 2190 + }, + { + "epoch": 0.65, + "learning_rate": 3.799392220985164e-05, + "loss": 1.4136, + "step": 2200 + }, + { + "epoch": 0.65, + "learning_rate": 3.789434831640533e-05, + "loss": 1.4188, + "step": 2210 + }, + { + "epoch": 0.66, + "learning_rate": 3.779449494458312e-05, + "loss": 1.4203, + "step": 2220 + }, + { + "epoch": 0.66, + "learning_rate": 3.769436425865557e-05, + "loss": 1.4263, + "step": 2230 + }, + { + "epoch": 0.66, + "learning_rate": 3.759395842890384e-05, + "loss": 1.4295, + "step": 2240 + }, + { + "epoch": 0.67, + "learning_rate": 3.749327963157274e-05, + "loss": 1.4144, + "step": 2250 + }, + { + "epoch": 0.67, + "learning_rate": 3.739233004882346e-05, + "loss": 1.4162, + "step": 2260 + }, + { + "epoch": 0.67, + "learning_rate": 3.729111186868635e-05, + "loss": 1.4099, + "step": 2270 + }, + { + "epoch": 0.68, + "learning_rate": 3.718962728501348e-05, + "loss": 1.3878, + "step": 2280 + }, + { + "epoch": 0.68, + "learning_rate": 3.708787849743106e-05, + "loss": 1.4399, + "step": 2290 + }, + { + "epoch": 0.68, + "learning_rate": 3.69858677112918e-05, + "loss": 1.4249, + "step": 2300 + }, + { + "epoch": 0.68, + "learning_rate": 3.688359713762707e-05, + "loss": 1.3925, + "step": 2310 + }, + { + "epoch": 0.69, + "learning_rate": 3.6781068993099034e-05, + "loss": 1.4036, + "step": 2320 + }, + { + "epoch": 0.69, + "learning_rate": 3.667828549995255e-05, + "loss": 1.3986, + "step": 2330 + }, + { + "epoch": 0.69, + "learning_rate": 3.657524888596703e-05, + "loss": 1.4298, + "step": 2340 + }, + { + "epoch": 0.7, + "learning_rate": 3.6471961384408155e-05, + "loss": 1.4016, + "step": 2350 + }, + { + "epoch": 0.7, + "learning_rate": 3.636842523397945e-05, + "loss": 1.3992, + "step": 2360 + }, + { + "epoch": 0.7, + "learning_rate": 3.626464267877381e-05, + "loss": 1.4441, + "step": 2370 + }, + { + "epoch": 0.71, + "learning_rate": 3.616061596822478e-05, + "loss": 1.3967, + "step": 2380 + }, + { + "epoch": 0.71, + "learning_rate": 3.6056347357057893e-05, + "loss": 1.4252, + "step": 2390 + }, + { + "epoch": 0.71, + "learning_rate": 3.595183910524173e-05, + "loss": 1.4209, + "step": 2400 + }, + { + "epoch": 0.71, + "learning_rate": 3.5847093477938956e-05, + "loss": 1.4133, + "step": 2410 + }, + { + "epoch": 0.72, + "learning_rate": 3.5742112745457235e-05, + "loss": 1.4313, + "step": 2420 + }, + { + "epoch": 0.72, + "learning_rate": 3.563689918320002e-05, + "loss": 1.4275, + "step": 2430 + }, + { + "epoch": 0.72, + "learning_rate": 3.5531455071617226e-05, + "loss": 1.421, + "step": 2440 + }, + { + "epoch": 0.73, + "learning_rate": 3.542578269615579e-05, + "loss": 1.4402, + "step": 2450 + }, + { + "epoch": 0.73, + "learning_rate": 3.5319884347210186e-05, + "loss": 1.4176, + "step": 2460 + }, + { + "epoch": 0.73, + "learning_rate": 3.521376232007271e-05, + "loss": 1.4117, + "step": 2470 + }, + { + "epoch": 0.73, + "learning_rate": 3.5107418914883794e-05, + "loss": 1.41, + "step": 2480 + }, + { + "epoch": 0.74, + "learning_rate": 3.500085643658211e-05, + "loss": 1.4313, + "step": 2490 + }, + { + "epoch": 0.74, + "learning_rate": 3.489407719485464e-05, + "loss": 1.4035, + "step": 2500 + }, + { + "epoch": 0.74, + "learning_rate": 3.4787083504086605e-05, + "loss": 1.4057, + "step": 2510 + }, + { + "epoch": 0.75, + "learning_rate": 3.467987768331127e-05, + "loss": 1.4125, + "step": 2520 + }, + { + "epoch": 0.75, + "learning_rate": 3.457246205615974e-05, + "loss": 1.4056, + "step": 2530 + }, + { + "epoch": 0.75, + "learning_rate": 3.446483895081054e-05, + "loss": 1.4082, + "step": 2540 + }, + { + "epoch": 0.76, + "learning_rate": 3.4357010699939215e-05, + "loss": 1.3915, + "step": 2550 + }, + { + "epoch": 0.76, + "learning_rate": 3.424897964066769e-05, + "loss": 1.4012, + "step": 2560 + }, + { + "epoch": 0.76, + "learning_rate": 3.4140748114513685e-05, + "loss": 1.4251, + "step": 2570 + }, + { + "epoch": 0.76, + "learning_rate": 3.403231846733994e-05, + "loss": 1.4013, + "step": 2580 + }, + { + "epoch": 0.77, + "learning_rate": 3.392369304930334e-05, + "loss": 1.4076, + "step": 2590 + }, + { + "epoch": 0.77, + "learning_rate": 3.3814874214804034e-05, + "loss": 1.3978, + "step": 2600 + }, + { + "epoch": 0.77, + "learning_rate": 3.3705864322434354e-05, + "loss": 1.408, + "step": 2610 + }, + { + "epoch": 0.78, + "learning_rate": 3.359666573492772e-05, + "loss": 1.3888, + "step": 2620 + }, + { + "epoch": 0.78, + "learning_rate": 3.3487280819107415e-05, + "loss": 1.4052, + "step": 2630 + }, + { + "epoch": 0.78, + "learning_rate": 3.33777119458353e-05, + "loss": 1.4286, + "step": 2640 + }, + { + "epoch": 0.79, + "learning_rate": 3.326796148996042e-05, + "loss": 1.4241, + "step": 2650 + }, + { + "epoch": 0.79, + "learning_rate": 3.315803183026753e-05, + "loss": 1.4049, + "step": 2660 + }, + { + "epoch": 0.79, + "learning_rate": 3.304792534942553e-05, + "loss": 1.3826, + "step": 2670 + }, + { + "epoch": 0.79, + "learning_rate": 3.293764443393582e-05, + "loss": 1.413, + "step": 2680 + }, + { + "epoch": 0.8, + "learning_rate": 3.2827191474080605e-05, + "loss": 1.4161, + "step": 2690 + }, + { + "epoch": 0.8, + "learning_rate": 3.2716568863871044e-05, + "loss": 1.382, + "step": 2700 + }, + { + "epoch": 0.8, + "learning_rate": 3.260577900099539e-05, + "loss": 1.381, + "step": 2710 + }, + { + "epoch": 0.81, + "learning_rate": 3.2494824286767e-05, + "loss": 1.396, + "step": 2720 + }, + { + "epoch": 0.81, + "learning_rate": 3.2383707126072315e-05, + "loss": 1.3923, + "step": 2730 + }, + { + "epoch": 0.81, + "learning_rate": 3.2272429927318707e-05, + "loss": 1.4044, + "step": 2740 + }, + { + "epoch": 0.81, + "learning_rate": 3.21609951023823e-05, + "loss": 1.4073, + "step": 2750 + }, + { + "epoch": 0.82, + "learning_rate": 3.204940506655568e-05, + "loss": 1.4178, + "step": 2760 + }, + { + "epoch": 0.82, + "learning_rate": 3.1937662238495544e-05, + "loss": 1.4179, + "step": 2770 + }, + { + "epoch": 0.82, + "learning_rate": 3.1825769040170285e-05, + "loss": 1.4003, + "step": 2780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1713727896807505e-05, + "loss": 1.4176, + "step": 2790 + }, + { + "epoch": 0.83, + "learning_rate": 3.160154123684143e-05, + "loss": 1.4179, + "step": 2800 + }, + { + "epoch": 0.83, + "learning_rate": 3.1489211491860276e-05, + "loss": 1.4098, + "step": 2810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1376741096553576e-05, + "loss": 1.4087, + "step": 2820 + }, + { + "epoch": 0.84, + "learning_rate": 3.126413248865935e-05, + "loss": 1.3971, + "step": 2830 + }, + { + "epoch": 0.84, + "learning_rate": 3.115138810891134e-05, + "loss": 1.3915, + "step": 2840 + }, + { + "epoch": 0.84, + "learning_rate": 3.103851040098607e-05, + "loss": 1.4041, + "step": 2850 + }, + { + "epoch": 0.85, + "learning_rate": 3.0925501811449855e-05, + "loss": 1.4129, + "step": 2860 + }, + { + "epoch": 0.85, + "learning_rate": 3.081236478970583e-05, + "loss": 1.3948, + "step": 2870 + }, + { + "epoch": 0.85, + "learning_rate": 3.069910178794082e-05, + "loss": 1.4116, + "step": 2880 + }, + { + "epoch": 0.86, + "learning_rate": 3.0585715261072206e-05, + "loss": 1.4029, + "step": 2890 + }, + { + "epoch": 0.86, + "learning_rate": 3.04722076666947e-05, + "loss": 1.399, + "step": 2900 + }, + { + "epoch": 0.86, + "learning_rate": 3.0358581465027125e-05, + "loss": 1.4061, + "step": 2910 + }, + { + "epoch": 0.87, + "learning_rate": 3.024483911885901e-05, + "loss": 1.4152, + "step": 2920 + }, + { + "epoch": 0.87, + "learning_rate": 3.013098309349729e-05, + "loss": 1.4257, + "step": 2930 + }, + { + "epoch": 0.87, + "learning_rate": 3.0017015856712814e-05, + "loss": 1.417, + "step": 2940 + }, + { + "epoch": 0.87, + "learning_rate": 2.9902939878686915e-05, + "loss": 1.3952, + "step": 2950 + }, + { + "epoch": 0.88, + "learning_rate": 2.978875763195779e-05, + "loss": 1.4252, + "step": 2960 + }, + { + "epoch": 0.88, + "learning_rate": 2.9674471591367005e-05, + "loss": 1.3982, + "step": 2970 + }, + { + "epoch": 0.88, + "learning_rate": 2.9560084234005765e-05, + "loss": 1.3948, + "step": 2980 + }, + { + "epoch": 0.89, + "learning_rate": 2.944559803916128e-05, + "loss": 1.4127, + "step": 2990 + }, + { + "epoch": 0.89, + "learning_rate": 2.9331015488263024e-05, + "loss": 1.4239, + "step": 3000 + }, + { + "epoch": 0.89, + "learning_rate": 2.9216339064828914e-05, + "loss": 1.3889, + "step": 3010 + }, + { + "epoch": 0.89, + "learning_rate": 2.910157125441152e-05, + "loss": 1.403, + "step": 3020 + }, + { + "epoch": 0.9, + "learning_rate": 2.898671454454418e-05, + "loss": 1.4106, + "step": 3030 + }, + { + "epoch": 0.9, + "learning_rate": 2.8871771424687078e-05, + "loss": 1.4123, + "step": 3040 + }, + { + "epoch": 0.9, + "learning_rate": 2.8756744386173284e-05, + "loss": 1.4137, + "step": 3050 + }, + { + "epoch": 0.91, + "learning_rate": 2.8641635922154774e-05, + "loss": 1.4009, + "step": 3060 + }, + { + "epoch": 0.91, + "learning_rate": 2.8526448527548372e-05, + "loss": 1.4159, + "step": 3070 + }, + { + "epoch": 0.91, + "learning_rate": 2.8411184698981684e-05, + "loss": 1.4071, + "step": 3080 + }, + { + "epoch": 0.92, + "learning_rate": 2.829584693473899e-05, + "loss": 1.41, + "step": 3090 + }, + { + "epoch": 0.92, + "learning_rate": 2.8180437734707064e-05, + "loss": 1.4038, + "step": 3100 + }, + { + "epoch": 0.92, + "learning_rate": 2.8064959600321043e-05, + "loss": 1.4069, + "step": 3110 + }, + { + "epoch": 0.92, + "learning_rate": 2.7949415034510163e-05, + "loss": 1.4096, + "step": 3120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7833806541643544e-05, + "loss": 1.3821, + "step": 3130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7718136627475865e-05, + "loss": 1.3886, + "step": 3140 + }, + { + "epoch": 0.93, + "learning_rate": 2.76024077990931e-05, + "loss": 1.3923, + "step": 3150 + }, + { + "epoch": 0.94, + "learning_rate": 2.748662256485816e-05, + "loss": 1.4072, + "step": 3160 + }, + { + "epoch": 0.94, + "learning_rate": 2.7370783434356512e-05, + "loss": 1.4126, + "step": 3170 + }, + { + "epoch": 0.94, + "learning_rate": 2.7254892918341802e-05, + "loss": 1.4238, + "step": 3180 + }, + { + "epoch": 0.95, + "learning_rate": 2.713895352868144e-05, + "loss": 1.4183, + "step": 3190 + }, + { + "epoch": 0.95, + "learning_rate": 2.702296777830212e-05, + "loss": 1.4056, + "step": 3200 + }, + { + "epoch": 0.95, + "learning_rate": 2.6906938181135423e-05, + "loss": 1.4096, + "step": 3210 + }, + { + "epoch": 0.95, + "learning_rate": 2.6790867252063247e-05, + "loss": 1.4018, + "step": 3220 + }, + { + "epoch": 0.96, + "learning_rate": 2.6674757506863357e-05, + "loss": 1.3922, + "step": 3230 + }, + { + "epoch": 0.96, + "learning_rate": 2.655861146215483e-05, + "loss": 1.4054, + "step": 3240 + }, + { + "epoch": 0.96, + "learning_rate": 2.6442431635343528e-05, + "loss": 1.3914, + "step": 3250 + }, + { + "epoch": 0.97, + "learning_rate": 2.6326220544567514e-05, + "loss": 1.3851, + "step": 3260 + }, + { + "epoch": 0.97, + "learning_rate": 2.620998070864248e-05, + "loss": 1.4102, + "step": 3270 + }, + { + "epoch": 0.97, + "learning_rate": 2.6093714647007156e-05, + "loss": 1.4069, + "step": 3280 + }, + { + "epoch": 0.97, + "learning_rate": 2.5977424879668705e-05, + "loss": 1.3919, + "step": 3290 + }, + { + "epoch": 0.98, + "learning_rate": 2.5861113927148096e-05, + "loss": 1.4073, + "step": 3300 + }, + { + "epoch": 0.98, + "learning_rate": 2.5744784310425467e-05, + "loss": 1.4025, + "step": 3310 + }, + { + "epoch": 0.98, + "learning_rate": 2.562843855088551e-05, + "loss": 1.3805, + "step": 3320 + }, + { + "epoch": 0.99, + "learning_rate": 2.5512079170262793e-05, + "loss": 1.4032, + "step": 3330 + }, + { + "epoch": 0.99, + "learning_rate": 2.5395708690587117e-05, + "loss": 1.4232, + "step": 3340 + }, + { + "epoch": 0.99, + "learning_rate": 2.527932963412885e-05, + "loss": 1.3897, + "step": 3350 + }, + { + "epoch": 1.0, + "learning_rate": 2.5162944523344256e-05, + "loss": 1.4008, + "step": 3360 + }, + { + "epoch": 1.0, + "learning_rate": 2.5046555880820826e-05, + "loss": 1.3936, + "step": 3370 + }, + { + "epoch": 1.0, + "learning_rate": 2.4930166229222597e-05, + "loss": 1.394, + "step": 3380 + }, + { + "epoch": 1.0, + "learning_rate": 2.481377809123547e-05, + "loss": 1.3903, + "step": 3390 + }, + { + "epoch": 1.01, + "learning_rate": 2.469739398951256e-05, + "loss": 1.3869, + "step": 3400 + }, + { + "epoch": 1.01, + "learning_rate": 2.458101644661947e-05, + "loss": 1.429, + "step": 3410 + }, + { + "epoch": 1.01, + "learning_rate": 2.4464647984979667e-05, + "loss": 1.3987, + "step": 3420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4348291126819783e-05, + "loss": 1.38, + "step": 3430 + }, + { + "epoch": 1.02, + "learning_rate": 2.4231948394114936e-05, + "loss": 1.3906, + "step": 3440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4115622308534096e-05, + "loss": 1.3931, + "step": 3450 + }, + { + "epoch": 1.03, + "learning_rate": 2.399931539138541e-05, + "loss": 1.4135, + "step": 3460 + }, + { + "epoch": 1.03, + "learning_rate": 2.388303016356156e-05, + "loss": 1.3952, + "step": 3470 + }, + { + "epoch": 1.03, + "learning_rate": 2.3766769145485125e-05, + "loss": 1.3972, + "step": 3480 + }, + { + "epoch": 1.03, + "learning_rate": 2.3650534857053943e-05, + "loss": 1.3937, + "step": 3490 + }, + { + "epoch": 1.04, + "learning_rate": 2.3534329817586513e-05, + "loss": 1.3936, + "step": 3500 + }, + { + "epoch": 1.04, + "learning_rate": 2.3418156545767365e-05, + "loss": 1.397, + "step": 3510 + }, + { + "epoch": 1.04, + "learning_rate": 2.3302017559592494e-05, + "loss": 1.3849, + "step": 3520 + }, + { + "epoch": 1.05, + "learning_rate": 2.318591537631476e-05, + "loss": 1.4118, + "step": 3530 + }, + { + "epoch": 1.05, + "learning_rate": 2.3069852512389335e-05, + "loss": 1.414, + "step": 3540 + }, + { + "epoch": 1.05, + "learning_rate": 2.2953831483419184e-05, + "loss": 1.4088, + "step": 3550 + }, + { + "epoch": 1.05, + "learning_rate": 2.2837854804100504e-05, + "loss": 1.3773, + "step": 3560 + }, + { + "epoch": 1.06, + "learning_rate": 2.272192498816825e-05, + "loss": 1.3977, + "step": 3570 + }, + { + "epoch": 1.06, + "learning_rate": 2.260604454834162e-05, + "loss": 1.3591, + "step": 3580 + }, + { + "epoch": 1.06, + "learning_rate": 2.2490215996269617e-05, + "loss": 1.4023, + "step": 3590 + }, + { + "epoch": 1.07, + "learning_rate": 2.237444184247661e-05, + "loss": 1.3873, + "step": 3600 + }, + { + "epoch": 1.07, + "learning_rate": 2.2258724596307915e-05, + "loss": 1.3826, + "step": 3610 + }, + { + "epoch": 1.07, + "learning_rate": 2.214306676587539e-05, + "loss": 1.3732, + "step": 3620 + }, + { + "epoch": 1.08, + "learning_rate": 2.2027470858003098e-05, + "loss": 1.3988, + "step": 3630 + }, + { + "epoch": 1.08, + "learning_rate": 2.1911939378172956e-05, + "loss": 1.4036, + "step": 3640 + }, + { + "epoch": 1.08, + "learning_rate": 2.1796474830470447e-05, + "loss": 1.4236, + "step": 3650 + }, + { + "epoch": 1.08, + "learning_rate": 2.1681079717530328e-05, + "loss": 1.4032, + "step": 3660 + }, + { + "epoch": 1.09, + "learning_rate": 2.156575654048239e-05, + "loss": 1.39, + "step": 3670 + }, + { + "epoch": 1.09, + "learning_rate": 2.145050779889725e-05, + "loss": 1.3757, + "step": 3680 + }, + { + "epoch": 1.09, + "learning_rate": 2.1335335990732186e-05, + "loss": 1.3934, + "step": 3690 + }, + { + "epoch": 1.1, + "learning_rate": 2.1220243612276964e-05, + "loss": 1.3979, + "step": 3700 + }, + { + "epoch": 1.1, + "learning_rate": 2.110523315809978e-05, + "loss": 1.4181, + "step": 3710 + }, + { + "epoch": 1.1, + "learning_rate": 2.0990307120993134e-05, + "loss": 1.406, + "step": 3720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0875467991919854e-05, + "loss": 1.4036, + "step": 3730 + }, + { + "epoch": 1.11, + "learning_rate": 2.076071825995906e-05, + "loss": 1.4095, + "step": 3740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0646060412252246e-05, + "loss": 1.4048, + "step": 3750 + }, + { + "epoch": 1.11, + "learning_rate": 2.0531496933949363e-05, + "loss": 1.3874, + "step": 3760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417030308154953e-05, + "loss": 1.3793, + "step": 3770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0302663015874322e-05, + "loss": 1.4152, + "step": 3780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0188397535959785e-05, + "loss": 1.3738, + "step": 3790 + }, + { + "epoch": 1.13, + "learning_rate": 2.007423634505692e-05, + "loss": 1.4033, + "step": 3800 + }, + { + "epoch": 1.13, + "learning_rate": 1.9960181917550897e-05, + "loss": 1.3753, + "step": 3810 + }, + { + "epoch": 1.13, + "learning_rate": 1.9846236725512835e-05, + "loss": 1.3791, + "step": 3820 + }, + { + "epoch": 1.13, + "learning_rate": 1.973240323864624e-05, + "loss": 1.3837, + "step": 3830 + }, + { + "epoch": 1.14, + "learning_rate": 1.9618683924233467e-05, + "loss": 1.3945, + "step": 3840 + }, + { + "epoch": 1.14, + "learning_rate": 1.9505081247082237e-05, + "loss": 1.3804, + "step": 3850 + }, + { + "epoch": 1.14, + "learning_rate": 1.9391597669472213e-05, + "loss": 1.3964, + "step": 3860 + }, + { + "epoch": 1.15, + "learning_rate": 1.927823565110165e-05, + "loss": 1.3983, + "step": 3870 + }, + { + "epoch": 1.15, + "learning_rate": 1.9164997649034058e-05, + "loss": 1.4169, + "step": 3880 + }, + { + "epoch": 1.15, + "learning_rate": 1.9051886117644963e-05, + "loss": 1.4101, + "step": 3890 + }, + { + "epoch": 1.16, + "learning_rate": 1.89389035085687e-05, + "loss": 1.3823, + "step": 3900 + }, + { + "epoch": 1.16, + "learning_rate": 1.8826052270645276e-05, + "loss": 1.3827, + "step": 3910 + }, + { + "epoch": 1.16, + "learning_rate": 1.8713334849867315e-05, + "loss": 1.4035, + "step": 3920 + }, + { + "epoch": 1.16, + "learning_rate": 1.8600753689327e-05, + "loss": 1.4081, + "step": 3930 + }, + { + "epoch": 1.17, + "learning_rate": 1.8488311229163152e-05, + "loss": 1.3919, + "step": 3940 + }, + { + "epoch": 1.17, + "learning_rate": 1.8376009906508338e-05, + "loss": 1.3854, + "step": 3950 + }, + { + "epoch": 1.17, + "learning_rate": 1.826385215543603e-05, + "loss": 1.3924, + "step": 3960 + }, + { + "epoch": 1.18, + "learning_rate": 1.8151840406907873e-05, + "loss": 1.3851, + "step": 3970 + }, + { + "epoch": 1.18, + "learning_rate": 1.8039977088720972e-05, + "loss": 1.3707, + "step": 3980 + }, + { + "epoch": 1.18, + "learning_rate": 1.7928264625455282e-05, + "loss": 1.3998, + "step": 3990 + }, + { + "epoch": 1.19, + "learning_rate": 1.7816705438421064e-05, + "loss": 1.3931, + "step": 4000 + }, + { + "epoch": 1.19, + "learning_rate": 1.7705301945606384e-05, + "loss": 1.3976, + "step": 4010 + }, + { + "epoch": 1.19, + "learning_rate": 1.7594056561624716e-05, + "loss": 1.3785, + "step": 4020 + }, + { + "epoch": 1.19, + "learning_rate": 1.748297169766262e-05, + "loss": 1.3845, + "step": 4030 + }, + { + "epoch": 1.2, + "learning_rate": 1.7372049761427457e-05, + "loss": 1.3926, + "step": 4040 + }, + { + "epoch": 1.2, + "learning_rate": 1.7261293157095204e-05, + "loss": 1.4075, + "step": 4050 + }, + { + "epoch": 1.2, + "learning_rate": 1.7150704285258375e-05, + "loss": 1.3938, + "step": 4060 + }, + { + "epoch": 1.21, + "learning_rate": 1.7040285542873945e-05, + "loss": 1.3884, + "step": 4070 + }, + { + "epoch": 1.21, + "learning_rate": 1.6930039323211448e-05, + "loss": 1.4066, + "step": 4080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6819968015801048e-05, + "loss": 1.3992, + "step": 4090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6710074006381797e-05, + "loss": 1.4066, + "step": 4100 + }, + { + "epoch": 1.22, + "learning_rate": 1.6600359676849892e-05, + "loss": 1.4076, + "step": 4110 + }, + { + "epoch": 1.22, + "learning_rate": 1.6490827405207062e-05, + "loss": 1.4078, + "step": 4120 + }, + { + "epoch": 1.22, + "learning_rate": 1.638147956550904e-05, + "loss": 1.4026, + "step": 4130 + }, + { + "epoch": 1.23, + "learning_rate": 1.627231852781407e-05, + "loss": 1.3861, + "step": 4140 + }, + { + "epoch": 1.23, + "learning_rate": 1.6163346658131567e-05, + "loss": 1.3915, + "step": 4150 + }, + { + "epoch": 1.23, + "learning_rate": 1.6054566318370832e-05, + "loss": 1.3828, + "step": 4160 + }, + { + "epoch": 1.24, + "learning_rate": 1.5945979866289844e-05, + "loss": 1.3952, + "step": 4170 + }, + { + "epoch": 1.24, + "learning_rate": 1.583758965544417e-05, + "loss": 1.3892, + "step": 4180 + }, + { + "epoch": 1.24, + "learning_rate": 1.5729398035135957e-05, + "loss": 1.3973, + "step": 4190 + }, + { + "epoch": 1.24, + "learning_rate": 1.5621407350362986e-05, + "loss": 1.4225, + "step": 4200 + }, + { + "epoch": 1.25, + "learning_rate": 1.5513619941767886e-05, + "loss": 1.3948, + "step": 4210 + }, + { + "epoch": 1.25, + "learning_rate": 1.540603814558736e-05, + "loss": 1.4074, + "step": 4220 + }, + { + "epoch": 1.25, + "learning_rate": 1.5298664293601574e-05, + "loss": 1.3965, + "step": 4230 + }, + { + "epoch": 1.26, + "learning_rate": 1.5191500713083615e-05, + "loss": 1.3743, + "step": 4240 + }, + { + "epoch": 1.26, + "learning_rate": 1.508454972674904e-05, + "loss": 1.384, + "step": 4250 + }, + { + "epoch": 1.26, + "learning_rate": 1.4977813652705535e-05, + "loss": 1.4018, + "step": 4260 + }, + { + "epoch": 1.27, + "learning_rate": 1.4871294804402675e-05, + "loss": 1.3904, + "step": 4270 + }, + { + "epoch": 1.27, + "learning_rate": 1.4764995490581779e-05, + "loss": 1.3981, + "step": 4280 + }, + { + "epoch": 1.27, + "learning_rate": 1.465891801522587e-05, + "loss": 1.4144, + "step": 4290 + }, + { + "epoch": 1.27, + "learning_rate": 1.4553064677509731e-05, + "loss": 1.4172, + "step": 4300 + }, + { + "epoch": 1.28, + "learning_rate": 1.4447437771750078e-05, + "loss": 1.3873, + "step": 4310 + }, + { + "epoch": 1.28, + "learning_rate": 1.4342039587355832e-05, + "loss": 1.3983, + "step": 4320 + }, + { + "epoch": 1.28, + "learning_rate": 1.423687240877849e-05, + "loss": 1.4007, + "step": 4330 + }, + { + "epoch": 1.29, + "learning_rate": 1.4131938515462639e-05, + "loss": 1.4088, + "step": 4340 + }, + { + "epoch": 1.29, + "learning_rate": 1.4027240181796508e-05, + "loss": 1.3941, + "step": 4350 + }, + { + "epoch": 1.29, + "learning_rate": 1.3922779677062689e-05, + "loss": 1.3975, + "step": 4360 + }, + { + "epoch": 1.29, + "learning_rate": 1.3818559265388964e-05, + "loss": 1.3842, + "step": 4370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3714581205699214e-05, + "loss": 1.4011, + "step": 4380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3610847751664473e-05, + "loss": 1.3881, + "step": 4390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3507361151654067e-05, + "loss": 1.4028, + "step": 4400 + }, + { + "epoch": 1.31, + "learning_rate": 1.340412364868689e-05, + "loss": 1.3973, + "step": 4410 + }, + { + "epoch": 1.31, + "learning_rate": 1.3301137480382786e-05, + "loss": 1.445, + "step": 4420 + }, + { + "epoch": 1.31, + "learning_rate": 1.3198404878914044e-05, + "loss": 1.3957, + "step": 4430 + }, + { + "epoch": 1.32, + "learning_rate": 1.3095928070957037e-05, + "loss": 1.395, + "step": 4440 + }, + { + "epoch": 1.32, + "learning_rate": 1.2993709277643922e-05, + "loss": 1.4157, + "step": 4450 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891750714514545e-05, + "loss": 1.4074, + "step": 4460 + }, + { + "epoch": 1.32, + "learning_rate": 1.2790054591468381e-05, + "loss": 1.3988, + "step": 4470 + }, + { + "epoch": 1.33, + "learning_rate": 1.2688623112716652e-05, + "loss": 1.3914, + "step": 4480 + }, + { + "epoch": 1.33, + "learning_rate": 1.2587458476734559e-05, + "loss": 1.3864, + "step": 4490 + }, + { + "epoch": 1.33, + "learning_rate": 1.248656287621362e-05, + "loss": 1.3934, + "step": 4500 + }, + { + "epoch": 1.34, + "learning_rate": 1.2385938498014138e-05, + "loss": 1.3893, + "step": 4510 + }, + { + "epoch": 1.34, + "learning_rate": 1.2285587523117825e-05, + "loss": 1.3991, + "step": 4520 + }, + { + "epoch": 1.34, + "learning_rate": 1.2185512126580512e-05, + "loss": 1.376, + "step": 4530 + }, + { + "epoch": 1.35, + "learning_rate": 1.2085714477484997e-05, + "loss": 1.3799, + "step": 4540 + }, + { + "epoch": 1.35, + "learning_rate": 1.1986196738894078e-05, + "loss": 1.3738, + "step": 4550 + }, + { + "epoch": 1.35, + "learning_rate": 1.188696106780361e-05, + "loss": 1.3754, + "step": 4560 + }, + { + "epoch": 1.35, + "learning_rate": 1.178800961509578e-05, + "loss": 1.4006, + "step": 4570 + }, + { + "epoch": 1.36, + "learning_rate": 1.1689344525492497e-05, + "loss": 1.4012, + "step": 4580 + }, + { + "epoch": 1.36, + "learning_rate": 1.1590967937508895e-05, + "loss": 1.3973, + "step": 4590 + }, + { + "epoch": 1.36, + "learning_rate": 1.149288198340698e-05, + "loss": 1.3737, + "step": 4600 + }, + { + "epoch": 1.37, + "learning_rate": 1.1395088789149419e-05, + "loss": 1.3998, + "step": 4610 + }, + { + "epoch": 1.37, + "learning_rate": 1.1297590474353464e-05, + "loss": 1.4053, + "step": 4620 + }, + { + "epoch": 1.37, + "learning_rate": 1.1200389152245003e-05, + "loss": 1.4038, + "step": 4630 + }, + { + "epoch": 1.37, + "learning_rate": 1.1103486929612759e-05, + "loss": 1.3968, + "step": 4640 + }, + { + "epoch": 1.38, + "learning_rate": 1.1006885906762626e-05, + "loss": 1.4037, + "step": 4650 + }, + { + "epoch": 1.38, + "learning_rate": 1.0910588177472153e-05, + "loss": 1.3901, + "step": 4660 + }, + { + "epoch": 1.38, + "learning_rate": 1.0814595828945154e-05, + "loss": 1.379, + "step": 4670 + }, + { + "epoch": 1.39, + "learning_rate": 1.0718910941766478e-05, + "loss": 1.3808, + "step": 4680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0623535589856887e-05, + "loss": 1.4105, + "step": 4690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0528471840428142e-05, + "loss": 1.3756, + "step": 4700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0433721753938182e-05, + "loss": 1.3708, + "step": 4710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0339287384046462e-05, + "loss": 1.3924, + "step": 4720 + }, + { + "epoch": 1.4, + "learning_rate": 1.024517077756943e-05, + "loss": 1.3854, + "step": 4730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0151373974436184e-05, + "loss": 1.3908, + "step": 4740 + }, + { + "epoch": 1.41, + "learning_rate": 1.0057899007644245e-05, + "loss": 1.3953, + "step": 4750 + }, + { + "epoch": 1.41, + "learning_rate": 9.964747903215513e-06, + "loss": 1.3933, + "step": 4760 + }, + { + "epoch": 1.41, + "learning_rate": 9.871922680152318e-06, + "loss": 1.3854, + "step": 4770 + }, + { + "epoch": 1.42, + "learning_rate": 9.779425350393685e-06, + "loss": 1.4026, + "step": 4780 + }, + { + "epoch": 1.42, + "learning_rate": 9.687257918771719e-06, + "loss": 1.3958, + "step": 4790 + }, + { + "epoch": 1.42, + "learning_rate": 9.595422382968156e-06, + "loss": 1.3777, + "step": 4800 + }, + { + "epoch": 1.43, + "learning_rate": 9.503920733471052e-06, + "loss": 1.3835, + "step": 4810 + }, + { + "epoch": 1.43, + "learning_rate": 9.412754953531663e-06, + "loss": 1.3768, + "step": 4820 + }, + { + "epoch": 1.43, + "learning_rate": 9.321927019121435e-06, + "loss": 1.3846, + "step": 4830 + }, + { + "epoch": 1.43, + "learning_rate": 9.231438898889184e-06, + "loss": 1.3878, + "step": 4840 + }, + { + "epoch": 1.44, + "learning_rate": 9.141292554118435e-06, + "loss": 1.38, + "step": 4850 + }, + { + "epoch": 1.44, + "learning_rate": 9.051489938684903e-06, + "loss": 1.3841, + "step": 4860 + }, + { + "epoch": 1.44, + "learning_rate": 8.962032999014144e-06, + "loss": 1.4122, + "step": 4870 + }, + { + "epoch": 1.45, + "learning_rate": 8.87292367403937e-06, + "loss": 1.3839, + "step": 4880 + }, + { + "epoch": 1.45, + "learning_rate": 8.784163895159428e-06, + "loss": 1.3932, + "step": 4890 + }, + { + "epoch": 1.45, + "learning_rate": 8.695755586196924e-06, + "loss": 1.4012, + "step": 4900 + }, + { + "epoch": 1.45, + "learning_rate": 8.607700663356543e-06, + "loss": 1.3931, + "step": 4910 + }, + { + "epoch": 1.46, + "learning_rate": 8.520001035183503e-06, + "loss": 1.4003, + "step": 4920 + }, + { + "epoch": 1.46, + "learning_rate": 8.432658602522193e-06, + "loss": 1.4064, + "step": 4930 + }, + { + "epoch": 1.46, + "learning_rate": 8.345675258474969e-06, + "loss": 1.383, + "step": 4940 + }, + { + "epoch": 1.47, + "learning_rate": 8.259052888361132e-06, + "loss": 1.4147, + "step": 4950 + }, + { + "epoch": 1.47, + "learning_rate": 8.172793369676052e-06, + "loss": 1.4064, + "step": 4960 + }, + { + "epoch": 1.47, + "learning_rate": 8.086898572050494e-06, + "loss": 1.3894, + "step": 4970 + }, + { + "epoch": 1.48, + "learning_rate": 8.00137035721007e-06, + "loss": 1.3928, + "step": 4980 + }, + { + "epoch": 1.48, + "learning_rate": 7.916210578934896e-06, + "loss": 1.4049, + "step": 4990 + }, + { + "epoch": 1.48, + "learning_rate": 7.831421083019422e-06, + "loss": 1.402, + "step": 5000 + }, + { + "epoch": 1.48, + "learning_rate": 7.747003707232415e-06, + "loss": 1.4144, + "step": 5010 + }, + { + "epoch": 1.49, + "learning_rate": 7.66296028127713e-06, + "loss": 1.3884, + "step": 5020 + }, + { + "epoch": 1.49, + "learning_rate": 7.579292626751647e-06, + "loss": 1.4116, + "step": 5030 + }, + { + "epoch": 1.49, + "learning_rate": 7.4960025571094025e-06, + "loss": 1.3828, + "step": 5040 + }, + { + "epoch": 1.5, + "learning_rate": 7.413091877619868e-06, + "loss": 1.3821, + "step": 5050 + }, + { + "epoch": 1.5, + "learning_rate": 7.330562385329429e-06, + "loss": 1.4068, + "step": 5060 + }, + { + "epoch": 1.5, + "learning_rate": 7.248415869022434e-06, + "loss": 1.3842, + "step": 5070 + }, + { + "epoch": 1.51, + "learning_rate": 7.16665410918243e-06, + "loss": 1.3937, + "step": 5080 + }, + { + "epoch": 1.51, + "learning_rate": 7.085278877953558e-06, + "loss": 1.4077, + "step": 5090 + }, + { + "epoch": 1.51, + "learning_rate": 7.004291939102148e-06, + "loss": 1.3989, + "step": 5100 + }, + { + "epoch": 1.51, + "learning_rate": 6.923695047978502e-06, + "loss": 1.3727, + "step": 5110 + }, + { + "epoch": 1.52, + "learning_rate": 6.843489951478829e-06, + "loss": 1.3842, + "step": 5120 + }, + { + "epoch": 1.52, + "learning_rate": 6.763678388007394e-06, + "loss": 1.3662, + "step": 5130 + }, + { + "epoch": 1.52, + "learning_rate": 6.684262087438839e-06, + "loss": 1.4092, + "step": 5140 + }, + { + "epoch": 1.53, + "learning_rate": 6.605242771080686e-06, + "loss": 1.399, + "step": 5150 + }, + { + "epoch": 1.53, + "learning_rate": 6.526622151636011e-06, + "loss": 1.3931, + "step": 5160 + }, + { + "epoch": 1.53, + "learning_rate": 6.448401933166351e-06, + "loss": 1.3824, + "step": 5170 + }, + { + "epoch": 1.53, + "learning_rate": 6.370583811054778e-06, + "loss": 1.3764, + "step": 5180 + }, + { + "epoch": 1.54, + "learning_rate": 6.293169471969104e-06, + "loss": 1.3835, + "step": 5190 + }, + { + "epoch": 1.54, + "learning_rate": 6.216160593825363e-06, + "loss": 1.382, + "step": 5200 + }, + { + "epoch": 1.54, + "learning_rate": 6.1395588457514226e-06, + "loss": 1.3983, + "step": 5210 + }, + { + "epoch": 1.55, + "learning_rate": 6.063365888050829e-06, + "loss": 1.3709, + "step": 5220 + }, + { + "epoch": 1.55, + "learning_rate": 5.987583372166794e-06, + "loss": 1.4037, + "step": 5230 + }, + { + "epoch": 1.55, + "learning_rate": 5.912212940646422e-06, + "loss": 1.3955, + "step": 5240 + }, + { + "epoch": 1.56, + "learning_rate": 5.8372562271051e-06, + "loss": 1.384, + "step": 5250 + }, + { + "epoch": 1.56, + "learning_rate": 5.762714856191087e-06, + "loss": 1.3772, + "step": 5260 + }, + { + "epoch": 1.56, + "learning_rate": 5.688590443550304e-06, + "loss": 1.3818, + "step": 5270 + }, + { + "epoch": 1.56, + "learning_rate": 5.61488459579132e-06, + "loss": 1.4039, + "step": 5280 + }, + { + "epoch": 1.57, + "learning_rate": 5.541598910450518e-06, + "loss": 1.3935, + "step": 5290 + }, + { + "epoch": 1.57, + "learning_rate": 5.4687349759574845e-06, + "loss": 1.402, + "step": 5300 + }, + { + "epoch": 1.57, + "learning_rate": 5.396294371600569e-06, + "loss": 1.3774, + "step": 5310 + }, + { + "epoch": 1.58, + "learning_rate": 5.3242786674926545e-06, + "loss": 1.3936, + "step": 5320 + }, + { + "epoch": 1.58, + "learning_rate": 5.252689424537139e-06, + "loss": 1.3914, + "step": 5330 + }, + { + "epoch": 1.58, + "learning_rate": 5.181528194394081e-06, + "loss": 1.3931, + "step": 5340 + }, + { + "epoch": 1.59, + "learning_rate": 5.11079651944659e-06, + "loss": 1.3854, + "step": 5350 + }, + { + "epoch": 1.59, + "learning_rate": 5.040495932767386e-06, + "loss": 1.4101, + "step": 5360 + }, + { + "epoch": 1.59, + "learning_rate": 4.970627958085574e-06, + "loss": 1.3929, + "step": 5370 + }, + { + "epoch": 1.59, + "learning_rate": 4.901194109753607e-06, + "loss": 1.3826, + "step": 5380 + }, + { + "epoch": 1.6, + "learning_rate": 4.832195892714489e-06, + "loss": 1.3974, + "step": 5390 + }, + { + "epoch": 1.6, + "learning_rate": 4.763634802469124e-06, + "loss": 1.3988, + "step": 5400 + }, + { + "epoch": 1.6, + "learning_rate": 4.6955123250439245e-06, + "loss": 1.3875, + "step": 5410 + }, + { + "epoch": 1.61, + "learning_rate": 4.6278299369585916e-06, + "loss": 1.3881, + "step": 5420 + }, + { + "epoch": 1.61, + "learning_rate": 4.560589105194121e-06, + "loss": 1.3924, + "step": 5430 + }, + { + "epoch": 1.61, + "learning_rate": 4.493791287160998e-06, + "loss": 1.4058, + "step": 5440 + }, + { + "epoch": 1.61, + "learning_rate": 4.4274379306676164e-06, + "loss": 1.3946, + "step": 5450 + }, + { + "epoch": 1.62, + "learning_rate": 4.361530473888889e-06, + "loss": 1.4045, + "step": 5460 + }, + { + "epoch": 1.62, + "learning_rate": 4.296070345335085e-06, + "loss": 1.3817, + "step": 5470 + }, + { + "epoch": 1.62, + "learning_rate": 4.231058963820867e-06, + "loss": 1.3989, + "step": 5480 + }, + { + "epoch": 1.63, + "learning_rate": 4.166497738434527e-06, + "loss": 1.4004, + "step": 5490 + }, + { + "epoch": 1.63, + "learning_rate": 4.102388068507465e-06, + "loss": 1.3905, + "step": 5500 + }, + { + "epoch": 1.63, + "learning_rate": 4.03873134358384e-06, + "loss": 1.3731, + "step": 5510 + }, + { + "epoch": 1.64, + "learning_rate": 3.9755289433904694e-06, + "loss": 1.383, + "step": 5520 + }, + { + "epoch": 1.64, + "learning_rate": 3.912782237806903e-06, + "loss": 1.3853, + "step": 5530 + }, + { + "epoch": 1.64, + "learning_rate": 3.850492586835755e-06, + "loss": 1.4039, + "step": 5540 + }, + { + "epoch": 1.64, + "learning_rate": 3.788661340573213e-06, + "loss": 1.3703, + "step": 5550 + }, + { + "epoch": 1.65, + "learning_rate": 3.7272898391797734e-06, + "loss": 1.388, + "step": 5560 + }, + { + "epoch": 1.65, + "learning_rate": 3.6663794128512038e-06, + "loss": 1.3785, + "step": 5570 + }, + { + "epoch": 1.65, + "learning_rate": 3.6059313817897065e-06, + "loss": 1.3901, + "step": 5580 + }, + { + "epoch": 1.66, + "learning_rate": 3.5459470561753e-06, + "loss": 1.3894, + "step": 5590 + }, + { + "epoch": 1.66, + "learning_rate": 3.4864277361374264e-06, + "loss": 1.3799, + "step": 5600 + }, + { + "epoch": 1.66, + "learning_rate": 3.4273747117267774e-06, + "loss": 1.3798, + "step": 5610 + }, + { + "epoch": 1.67, + "learning_rate": 3.3687892628873175e-06, + "loss": 1.3948, + "step": 5620 + }, + { + "epoch": 1.67, + "learning_rate": 3.310672659428557e-06, + "loss": 1.4071, + "step": 5630 + }, + { + "epoch": 1.67, + "learning_rate": 3.2530261609980183e-06, + "loss": 1.3993, + "step": 5640 + }, + { + "epoch": 1.67, + "learning_rate": 3.195851017053944e-06, + "loss": 1.3885, + "step": 5650 + }, + { + "epoch": 1.68, + "learning_rate": 3.1391484668382073e-06, + "loss": 1.3919, + "step": 5660 + }, + { + "epoch": 1.68, + "learning_rate": 3.0829197393494548e-06, + "loss": 1.3965, + "step": 5670 + }, + { + "epoch": 1.68, + "learning_rate": 3.0271660533164714e-06, + "loss": 1.4135, + "step": 5680 + }, + { + "epoch": 1.69, + "learning_rate": 2.9718886171717613e-06, + "loss": 1.3923, + "step": 5690 + }, + { + "epoch": 1.69, + "learning_rate": 2.9170886290253552e-06, + "loss": 1.3663, + "step": 5700 + }, + { + "epoch": 1.69, + "learning_rate": 2.8627672766388448e-06, + "loss": 1.3772, + "step": 5710 + }, + { + "epoch": 1.69, + "learning_rate": 2.8089257373996424e-06, + "loss": 1.373, + "step": 5720 + }, + { + "epoch": 1.7, + "learning_rate": 2.755565178295447e-06, + "loss": 1.3858, + "step": 5730 + }, + { + "epoch": 1.7, + "learning_rate": 2.7026867558889694e-06, + "loss": 1.3996, + "step": 5740 + }, + { + "epoch": 1.7, + "learning_rate": 2.6502916162928463e-06, + "loss": 1.4194, + "step": 5750 + }, + { + "epoch": 1.71, + "learning_rate": 2.5983808951448196e-06, + "loss": 1.3895, + "step": 5760 + }, + { + "epoch": 1.71, + "learning_rate": 2.5469557175830993e-06, + "loss": 1.376, + "step": 5770 + }, + { + "epoch": 1.71, + "learning_rate": 2.496017198221995e-06, + "loss": 1.3901, + "step": 5780 + }, + { + "epoch": 1.72, + "learning_rate": 2.445566441127742e-06, + "loss": 1.3782, + "step": 5790 + }, + { + "epoch": 1.72, + "learning_rate": 2.3956045397945826e-06, + "loss": 1.3723, + "step": 5800 + }, + { + "epoch": 1.72, + "learning_rate": 2.3461325771210683e-06, + "loss": 1.3705, + "step": 5810 + }, + { + "epoch": 1.72, + "learning_rate": 2.297151625386576e-06, + "loss": 1.4018, + "step": 5820 + }, + { + "epoch": 1.73, + "learning_rate": 2.2486627462280724e-06, + "loss": 1.3811, + "step": 5830 + }, + { + "epoch": 1.73, + "learning_rate": 2.200666990617098e-06, + "loss": 1.3894, + "step": 5840 + }, + { + "epoch": 1.73, + "learning_rate": 2.153165398837009e-06, + "loss": 1.3931, + "step": 5850 + }, + { + "epoch": 1.74, + "learning_rate": 2.1061590004603978e-06, + "loss": 1.3775, + "step": 5860 + }, + { + "epoch": 1.74, + "learning_rate": 2.059648814326806e-06, + "loss": 1.3897, + "step": 5870 + }, + { + "epoch": 1.74, + "learning_rate": 2.013635848520626e-06, + "loss": 1.3919, + "step": 5880 + }, + { + "epoch": 1.75, + "learning_rate": 1.9681211003492543e-06, + "loss": 1.4191, + "step": 5890 + }, + { + "epoch": 1.75, + "learning_rate": 1.923105556321475e-06, + "loss": 1.4027, + "step": 5900 + }, + { + "epoch": 1.75, + "learning_rate": 1.8785901921260784e-06, + "loss": 1.3767, + "step": 5910 + }, + { + "epoch": 1.75, + "learning_rate": 1.8345759726107193e-06, + "loss": 1.3801, + "step": 5920 + }, + { + "epoch": 1.76, + "learning_rate": 1.7910638517609962e-06, + "loss": 1.3881, + "step": 5930 + }, + { + "epoch": 1.76, + "learning_rate": 1.748054772679772e-06, + "loss": 1.3915, + "step": 5940 + }, + { + "epoch": 1.76, + "learning_rate": 1.705549667566747e-06, + "loss": 1.403, + "step": 5950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6635494576982353e-06, + "loss": 1.3882, + "step": 5960 + }, + { + "epoch": 1.77, + "learning_rate": 1.6220550534072094e-06, + "loss": 1.3902, + "step": 5970 + }, + { + "epoch": 1.77, + "learning_rate": 1.5810673540635702e-06, + "loss": 1.3928, + "step": 5980 + }, + { + "epoch": 1.77, + "learning_rate": 1.540587248054645e-06, + "loss": 1.3819, + "step": 5990 + }, + { + "epoch": 1.78, + "learning_rate": 1.5006156127659348e-06, + "loss": 1.3948, + "step": 6000 + }, + { + "epoch": 1.78, + "learning_rate": 1.4611533145621026e-06, + "loss": 1.3914, + "step": 6010 + }, + { + "epoch": 1.78, + "learning_rate": 1.422201208768187e-06, + "loss": 1.3713, + "step": 6020 + }, + { + "epoch": 1.79, + "learning_rate": 1.3837601396510745e-06, + "loss": 1.3922, + "step": 6030 + }, + { + "epoch": 1.79, + "learning_rate": 1.345830940401191e-06, + "loss": 1.3775, + "step": 6040 + }, + { + "epoch": 1.79, + "learning_rate": 1.3084144331144439e-06, + "loss": 1.3974, + "step": 6050 + }, + { + "epoch": 1.8, + "learning_rate": 1.2715114287744079e-06, + "loss": 1.3859, + "step": 6060 + }, + { + "epoch": 1.8, + "learning_rate": 1.2351227272347444e-06, + "loss": 1.3795, + "step": 6070 + }, + { + "epoch": 1.8, + "learning_rate": 1.19924911720187e-06, + "loss": 1.376, + "step": 6080 + }, + { + "epoch": 1.8, + "learning_rate": 1.1638913762178489e-06, + "loss": 1.3892, + "step": 6090 + }, + { + "epoch": 1.81, + "learning_rate": 1.1290502706435584e-06, + "loss": 1.3866, + "step": 6100 + }, + { + "epoch": 1.81, + "learning_rate": 1.0947265556420588e-06, + "loss": 1.3911, + "step": 6110 + }, + { + "epoch": 1.81, + "learning_rate": 1.060920975162244e-06, + "loss": 1.3535, + "step": 6120 + }, + { + "epoch": 1.82, + "learning_rate": 1.0276342619227024e-06, + "loss": 1.3815, + "step": 6130 + }, + { + "epoch": 1.82, + "learning_rate": 9.948671373958468e-07, + "loss": 1.3989, + "step": 6140 + }, + { + "epoch": 1.82, + "learning_rate": 9.626203117922672e-07, + "loss": 1.3974, + "step": 6150 + }, + { + "epoch": 1.83, + "learning_rate": 9.308944840453415e-07, + "loss": 1.3717, + "step": 6160 + }, + { + "epoch": 1.83, + "learning_rate": 8.996903417960917e-07, + "loss": 1.3797, + "step": 6170 + }, + { + "epoch": 1.83, + "learning_rate": 8.690085613782706e-07, + "loss": 1.3846, + "step": 6180 + }, + { + "epoch": 1.83, + "learning_rate": 8.388498078037044e-07, + "loss": 1.379, + "step": 6190 + }, + { + "epoch": 1.84, + "learning_rate": 8.092147347478873e-07, + "loss": 1.408, + "step": 6200 + }, + { + "epoch": 1.84, + "learning_rate": 7.801039845358044e-07, + "loss": 1.3776, + "step": 6210 + }, + { + "epoch": 1.84, + "learning_rate": 7.515181881280115e-07, + "loss": 1.397, + "step": 6220 + }, + { + "epoch": 1.85, + "learning_rate": 7.234579651069578e-07, + "loss": 1.3768, + "step": 6230 + }, + { + "epoch": 1.85, + "learning_rate": 6.959239236635662e-07, + "loss": 1.3807, + "step": 6240 + }, + { + "epoch": 1.85, + "learning_rate": 6.68916660584043e-07, + "loss": 1.3947, + "step": 6250 + }, + { + "epoch": 1.85, + "learning_rate": 6.424367612369364e-07, + "loss": 1.3785, + "step": 6260 + }, + { + "epoch": 1.86, + "learning_rate": 6.164847995604656e-07, + "loss": 1.3828, + "step": 6270 + }, + { + "epoch": 1.86, + "learning_rate": 5.910613380500696e-07, + "loss": 1.3765, + "step": 6280 + }, + { + "epoch": 1.86, + "learning_rate": 5.66166927746209e-07, + "loss": 1.3994, + "step": 6290 + }, + { + "epoch": 1.87, + "learning_rate": 5.418021082224472e-07, + "loss": 1.4081, + "step": 6300 + }, + { + "epoch": 1.87, + "learning_rate": 5.179674075737273e-07, + "loss": 1.3885, + "step": 6310 + }, + { + "epoch": 1.87, + "learning_rate": 4.946633424049413e-07, + "loss": 1.3748, + "step": 6320 + }, + { + "epoch": 1.88, + "learning_rate": 4.7189041781973144e-07, + "loss": 1.3988, + "step": 6330 + }, + { + "epoch": 1.88, + "learning_rate": 4.4964912740954045e-07, + "loss": 1.3815, + "step": 6340 + }, + { + "epoch": 1.88, + "learning_rate": 4.2793995324290903e-07, + "loss": 1.3736, + "step": 6350 + }, + { + "epoch": 1.88, + "learning_rate": 4.067633658550396e-07, + "loss": 1.3847, + "step": 6360 + }, + { + "epoch": 1.89, + "learning_rate": 3.861198242375852e-07, + "loss": 1.3907, + "step": 6370 + }, + { + "epoch": 1.89, + "learning_rate": 3.660097758287018e-07, + "loss": 1.3914, + "step": 6380 + }, + { + "epoch": 1.89, + "learning_rate": 3.464336565033588e-07, + "loss": 1.3861, + "step": 6390 + }, + { + "epoch": 1.9, + "learning_rate": 3.273918905638912e-07, + "loss": 1.3937, + "step": 6400 + }, + { + "epoch": 1.9, + "learning_rate": 3.088848907307873e-07, + "loss": 1.3783, + "step": 6410 + }, + { + "epoch": 1.9, + "learning_rate": 2.909130581337655e-07, + "loss": 1.4057, + "step": 6420 + }, + { + "epoch": 1.91, + "learning_rate": 2.7347678230306427e-07, + "loss": 1.3672, + "step": 6430 + }, + { + "epoch": 1.91, + "learning_rate": 2.5657644116100497e-07, + "loss": 1.3742, + "step": 6440 + }, + { + "epoch": 1.91, + "learning_rate": 2.4021240101379793e-07, + "loss": 1.3964, + "step": 6450 + }, + { + "epoch": 1.91, + "learning_rate": 2.243850165436129e-07, + "loss": 1.4025, + "step": 6460 + }, + { + "epoch": 1.92, + "learning_rate": 2.0909463080087123e-07, + "loss": 1.386, + "step": 6470 + }, + { + "epoch": 1.92, + "learning_rate": 1.9434157519683248e-07, + "loss": 1.3875, + "step": 6480 + }, + { + "epoch": 1.92, + "learning_rate": 1.8012616949640015e-07, + "loss": 1.3945, + "step": 6490 + }, + { + "epoch": 1.93, + "learning_rate": 1.6644872181118543e-07, + "loss": 1.3725, + "step": 6500 + }, + { + "epoch": 1.93, + "learning_rate": 1.533095285928432e-07, + "loss": 1.3732, + "step": 6510 + }, + { + "epoch": 1.93, + "learning_rate": 1.4070887462663552e-07, + "loss": 1.3791, + "step": 6520 + }, + { + "epoch": 1.93, + "learning_rate": 1.2864703302526703e-07, + "loss": 1.3922, + "step": 6530 + }, + { + "epoch": 1.94, + "learning_rate": 1.171242652229565e-07, + "loss": 1.3737, + "step": 6540 + }, + { + "epoch": 1.94, + "learning_rate": 1.0614082096977729e-07, + "loss": 1.3879, + "step": 6550 + }, + { + "epoch": 1.94, + "learning_rate": 9.569693832623961e-08, + "loss": 1.3962, + "step": 6560 + }, + { + "epoch": 1.95, + "learning_rate": 8.579284365813623e-08, + "loss": 1.4029, + "step": 6570 + }, + { + "epoch": 1.95, + "learning_rate": 7.642875163162977e-08, + "loss": 1.3903, + "step": 6580 + }, + { + "epoch": 1.95, + "learning_rate": 6.760486520860643e-08, + "loss": 1.3986, + "step": 6590 + }, + { + "epoch": 1.96, + "learning_rate": 5.932137564227391e-08, + "loss": 1.3624, + "step": 6600 + }, + { + "epoch": 1.96, + "learning_rate": 5.157846247300646e-08, + "loss": 1.3732, + "step": 6610 + }, + { + "epoch": 1.96, + "learning_rate": 4.43762935244757e-08, + "loss": 1.3838, + "step": 6620 + }, + { + "epoch": 1.96, + "learning_rate": 3.7715024899989684e-08, + "loss": 1.3711, + "step": 6630 + }, + { + "epoch": 1.97, + "learning_rate": 3.159480097912615e-08, + "loss": 1.3974, + "step": 6640 + }, + { + "epoch": 1.97, + "learning_rate": 2.6015754414593363e-08, + "loss": 1.3918, + "step": 6650 + }, + { + "epoch": 1.97, + "learning_rate": 2.097800612936296e-08, + "loss": 1.3933, + "step": 6660 + }, + { + "epoch": 1.98, + "learning_rate": 1.6481665314035944e-08, + "loss": 1.3815, + "step": 6670 + }, + { + "epoch": 1.98, + "learning_rate": 1.2526829424494569e-08, + "loss": 1.3671, + "step": 6680 + }, + { + "epoch": 1.98, + "learning_rate": 9.113584179770707e-09, + "loss": 1.4008, + "step": 6690 + }, + { + "epoch": 1.99, + "learning_rate": 6.242003560205656e-09, + "loss": 1.3876, + "step": 6700 + }, + { + "epoch": 1.99, + "learning_rate": 3.9121498058292124e-09, + "loss": 1.3722, + "step": 6710 + }, + { + "epoch": 1.99, + "learning_rate": 2.124073415030181e-09, + "loss": 1.3823, + "step": 6720 + }, + { + "epoch": 1.99, + "learning_rate": 8.778131434433734e-10, + "loss": 1.3815, + "step": 6730 + }, + { + "epoch": 2.0, + "learning_rate": 1.7339600311971903e-10, + "loss": 1.3889, + "step": 6740 + }, + { + "epoch": 2.0, + "step": 6748, + "total_flos": 1.1679618660044898e+19, + "train_loss": 1.4147593358881598, + "train_runtime": 86303.1376, + "train_samples_per_second": 7.507, + "train_steps_per_second": 0.078 + } + ], + "max_steps": 6748, + "num_train_epochs": 2, + "total_flos": 1.1679618660044898e+19, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..406ab5b628f223bfcd63d70185fb1bc0973e19c4 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:77681af64e1f04ae2b28b063de632629c209cd2338ce2449c3e014f309b6088a +size 3298 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..433a1d097d25ccd46310cbb646f9e9a7c47c68c5 Binary files /dev/null and b/training_loss.png differ