diff --git a/README.md b/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/adapter_config.json b/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/adapter_model.bin b/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..13caf956e8aa7d0a4bcfc75335043415d6167487 --- /dev/null +++ b/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ea7b0fb91f08aca8b4b7ab4fc593a72c7ac8f936a8b1b0f74170dc31085ccd8 +size 16821197 diff --git a/all_results.json b/all_results.json new file mode 100644 index 0000000000000000000000000000000000000000..719e3a99ae710467091fa106314e8877f34c3e8d --- /dev/null +++ b/all_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.0, + "train_loss": 0.7399377094979515, + "train_runtime": 176487.3633, + "train_samples_per_second": 2.562, + "train_steps_per_second": 0.16 +} \ No newline at end of file diff --git a/checkpoint-1000/README.md b/checkpoint-1000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-1000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-1000/adapter_config.json b/checkpoint-1000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-1000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-1000/adapter_model.bin b/checkpoint-1000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c4a5f635ace6ea043b9713588b10be17d73243c1 --- /dev/null +++ b/checkpoint-1000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6a9a2d59099a538613c48a0b00ee727732ab2529a774194cb4fc25234c00c876 +size 16821197 diff --git a/checkpoint-1000/finetuning_args.json b/checkpoint-1000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-1000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-1000/optimizer.pt b/checkpoint-1000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6990e42f4202c3ab558a77e5a98284b51ac99015 --- /dev/null +++ b/checkpoint-1000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fa7f1c6570f9391d5d5017e923d9263b86cd14f345e7de3dcc6457dde24209e5 +size 33661637 diff --git a/checkpoint-1000/rng_state.pth b/checkpoint-1000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bad7d82aa9036d4e01e97772fee3e11bca57f08d --- /dev/null +++ b/checkpoint-1000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bd0a027fab7d1d27eeff972e975255506e62b2efd84dd6b01dc103696d8f4e46 +size 18663 diff --git a/checkpoint-1000/scheduler.pt b/checkpoint-1000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..dac43132c294bf3af2badee4466b8fcf9fd57a94 --- /dev/null +++ b/checkpoint-1000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f4e98d8a4e42439109d00bd301cb15fbea0119270f18a90ec80ba6f284c47478 +size 627 diff --git a/checkpoint-1000/trainer_state.json b/checkpoint-1000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..a2b1b72fab74d7eb8fcb12c44d44bca6bd5ab4bb --- /dev/null +++ b/checkpoint-1000/trainer_state.json @@ -0,0 +1,616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.07078267947833165, + "global_step": 1000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 2.4351950380204032e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-1000/training_args.bin b/checkpoint-1000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-1000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-10000/README.md b/checkpoint-10000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-10000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-10000/adapter_config.json b/checkpoint-10000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-10000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-10000/adapter_model.bin b/checkpoint-10000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..2c940c06dc2905fb2556f2e48f09cc7b4d519a92 --- /dev/null +++ b/checkpoint-10000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3f714c57ae0e84c325da10732bbcdc8c43ef4e2ea454b9bfe7595bef647318c +size 16821197 diff --git a/checkpoint-10000/finetuning_args.json b/checkpoint-10000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-10000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-10000/optimizer.pt b/checkpoint-10000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..720ba361c774154076fa8fcb4654f46755702a59 --- /dev/null +++ b/checkpoint-10000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:42123fa83d920bb3a255a7a687a2eb1ea9f17e566bb97bb150b469a78b965ce1 +size 33661637 diff --git a/checkpoint-10000/rng_state.pth b/checkpoint-10000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..00879a270c34b0ed8fef4ad13c7bff98aef055d1 --- /dev/null +++ b/checkpoint-10000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a05d6225789398771b5189036c35cf28db936606ca14f38f8ecd1039e1dcde3a +size 18663 diff --git a/checkpoint-10000/scheduler.pt b/checkpoint-10000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..e15a4c63997ec082a30d7dc48022262ec3e65ac7 --- /dev/null +++ b/checkpoint-10000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:93cc76c5d7ae5ed46080b681d1f5208fe8bc5915697b321ef9f75faa596c26c9 +size 627 diff --git a/checkpoint-10000/trainer_state.json b/checkpoint-10000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..761115a8b059adb314c45f5a83441b013f4ab291 --- /dev/null +++ b/checkpoint-10000/trainer_state.json @@ -0,0 +1,6016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7078267947833166, + "global_step": 10000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 2.4338932916834796e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-10000/training_args.bin b/checkpoint-10000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-10000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-11000/README.md b/checkpoint-11000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-11000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-11000/adapter_config.json b/checkpoint-11000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-11000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-11000/adapter_model.bin b/checkpoint-11000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..f4e1734fecf52eb6981aec6974a59c817331c00c --- /dev/null +++ b/checkpoint-11000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a26437afd89e8830252fa31d98f1f93d178e45eb47dec9daab089eda6cc14cb7 +size 16821197 diff --git a/checkpoint-11000/finetuning_args.json b/checkpoint-11000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-11000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-11000/optimizer.pt b/checkpoint-11000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..337e05c38cd1ddf3a9c859c5d11da6bdab16c646 --- /dev/null +++ b/checkpoint-11000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3dfdc5a7b72a82bea6b1e623b952a95acc24b2e6a929041a0240338ae0d4a344 +size 33661637 diff --git a/checkpoint-11000/rng_state.pth b/checkpoint-11000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..bab20504a15b3276e6704a60d395116d5c89f59b --- /dev/null +++ b/checkpoint-11000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6109f33969d76a77ca5a4ec7325a2b0f5c2f0162418caf5905fe6064ad3e6074 +size 18663 diff --git a/checkpoint-11000/scheduler.pt b/checkpoint-11000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..45fa892583caeba25a828dbbecd21f308290f684 --- /dev/null +++ b/checkpoint-11000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0eb3b6455e8be51eec964213c3e96c486647705885fc26f5616552411cb5375e +size 627 diff --git a/checkpoint-11000/trainer_state.json b/checkpoint-11000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..e231542bfcd9c2635d1ca8e12a3166f6d50f2e63 --- /dev/null +++ b/checkpoint-11000/trainer_state.json @@ -0,0 +1,6616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.7786094742616482, + "global_step": 11000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 2.677334746266075e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-11000/training_args.bin b/checkpoint-11000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-11000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-12000/README.md b/checkpoint-12000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-12000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-12000/adapter_config.json b/checkpoint-12000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-12000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-12000/adapter_model.bin b/checkpoint-12000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..43f7052ddadb9fa46391c5b0f701284ea52252e7 --- /dev/null +++ b/checkpoint-12000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:15cdefa05b6a05bcd8506fe3040c564100608491ad329a6d0457fb705598efd1 +size 16821197 diff --git a/checkpoint-12000/finetuning_args.json b/checkpoint-12000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-12000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-12000/optimizer.pt b/checkpoint-12000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..095ea2f84967661707fbfdaafceef93fd0f63062 --- /dev/null +++ b/checkpoint-12000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e54b9abfd43b35a4193c6e08f54701755cf5f80021e890fe8c9b4062c584767 +size 33661637 diff --git a/checkpoint-12000/rng_state.pth b/checkpoint-12000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..904414d327d7723ebd698eb333d391f9aef9b27e --- /dev/null +++ b/checkpoint-12000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:497fc5f4f84ce21c7b29d94b52ec8313d6cb1ed9a8c4299da58f29e636f1e066 +size 18663 diff --git a/checkpoint-12000/scheduler.pt b/checkpoint-12000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ac25c9efaf3ae897fe283fefb2c4d267ad34812c --- /dev/null +++ b/checkpoint-12000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a7ef392eb88915ec01d1b6e6bc31e16cd878df9a85102b1a2433fb0bdd4d5ee0 +size 627 diff --git a/checkpoint-12000/trainer_state.json b/checkpoint-12000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..02e872fe1755c2404917496ecd63e994e99a6f61 --- /dev/null +++ b/checkpoint-12000/trainer_state.json @@ -0,0 +1,7216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.8493921537399798, + "global_step": 12000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 2.9208226729316844e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-12000/training_args.bin b/checkpoint-12000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-12000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-13000/README.md b/checkpoint-13000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-13000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-13000/adapter_config.json b/checkpoint-13000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-13000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-13000/adapter_model.bin b/checkpoint-13000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..02efb479cf386f4b1e7dca40b2c80db4d7ec6787 --- /dev/null +++ b/checkpoint-13000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:38b36ab9ab0afc9ef5746482ede51e116a4ed0f4afd488519d80af3ff546f112 +size 16821197 diff --git a/checkpoint-13000/finetuning_args.json b/checkpoint-13000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-13000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-13000/optimizer.pt b/checkpoint-13000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..537a1fa097570f491a0009c59ba783ce8de6c48b --- /dev/null +++ b/checkpoint-13000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e4039ba5883d9c3a3811e634bdbf861fa23147fc2d3373da1603eef651eff800 +size 33661637 diff --git a/checkpoint-13000/rng_state.pth b/checkpoint-13000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3eae5b2f929aa77a6a835eb6be307afc776bfa3c --- /dev/null +++ b/checkpoint-13000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2bd8d759d0ef06b2ecb227efae8b257b736c638ac6508a3810732b04842cf81f +size 18663 diff --git a/checkpoint-13000/scheduler.pt b/checkpoint-13000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..29ae16b6278e19b1f1c53ca4b1866b267be05e7c --- /dev/null +++ b/checkpoint-13000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ac65d947299795de87194e2e527c46bcce31fcd02f89111f6e6ba1050b5bf93b +size 627 diff --git a/checkpoint-13000/trainer_state.json b/checkpoint-13000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c2a5a1ed16a799f632baec9298c70251b5ac464f --- /dev/null +++ b/checkpoint-13000/trainer_state.json @@ -0,0 +1,7816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9201748332183115, + "global_step": 13000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 3.164883750098043e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-13000/training_args.bin b/checkpoint-13000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-13000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-14000/README.md b/checkpoint-14000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-14000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-14000/adapter_config.json b/checkpoint-14000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-14000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-14000/adapter_model.bin b/checkpoint-14000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..c3c11ab044649ac0da7c0fff90b3776f0240d418 --- /dev/null +++ b/checkpoint-14000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4c0a4f31bc50579c56c540a4707c55129fe1a1520852c04d1f560a5d832734c5 +size 16821197 diff --git a/checkpoint-14000/finetuning_args.json b/checkpoint-14000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-14000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-14000/optimizer.pt b/checkpoint-14000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..5eefdeace40a7bb3dcb6ce9cc24792621caff8b6 --- /dev/null +++ b/checkpoint-14000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:55e18f7bf8b9a1f5f42d870287f98fb691e030375733662fbaa05ac2b8600d64 +size 33661637 diff --git a/checkpoint-14000/rng_state.pth b/checkpoint-14000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cf8bad1d0a596d64a98a40d170559285ffe106f6 --- /dev/null +++ b/checkpoint-14000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d3f4b95f8d5a27294447ce27e53452e51631b68d4609421658957aa1a5fb2c0c +size 18663 diff --git a/checkpoint-14000/scheduler.pt b/checkpoint-14000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..49183631961f201fbc6476a519e795674f154286 --- /dev/null +++ b/checkpoint-14000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:447acf0c5b5ff1dbbd3c36943fba647b26c2fbb0eb449b8a2eeb605a0e416752 +size 627 diff --git a/checkpoint-14000/trainer_state.json b/checkpoint-14000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ef1983c77ec37e5a99daa8d2458b54a4ec096732 --- /dev/null +++ b/checkpoint-14000/trainer_state.json @@ -0,0 +1,8416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9909575126966431, + "global_step": 14000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 3.407532154270253e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-14000/training_args.bin b/checkpoint-14000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-14000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-15000/README.md b/checkpoint-15000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-15000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-15000/adapter_config.json b/checkpoint-15000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-15000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-15000/adapter_model.bin b/checkpoint-15000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..5af5624397de11a914360703e15460a55b0b0e8b --- /dev/null +++ b/checkpoint-15000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f55a91cdcae4bfb15d572ad57e3df22bcdbf61cfa226afb5fe737ded8023231 +size 16821197 diff --git a/checkpoint-15000/finetuning_args.json b/checkpoint-15000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-15000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-15000/optimizer.pt b/checkpoint-15000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7e33a49cc362b24dfacfb1627c98871c79920a87 --- /dev/null +++ b/checkpoint-15000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4bdf72f12ad50751cb747e7833e440ab2e311515b18177652a935b13267e34e6 +size 33661637 diff --git a/checkpoint-15000/rng_state.pth b/checkpoint-15000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..f7766aea14ad6a67c2b5e52476be7fc6edf9e3ab --- /dev/null +++ b/checkpoint-15000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96765908e83e41154d76acd78b91ab9d408bd03feb3acc81c39d2b0ad94276df +size 18663 diff --git a/checkpoint-15000/scheduler.pt b/checkpoint-15000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..c6c251a4abe32eaeaf4f1a9eed0ceb6fd99e6534 --- /dev/null +++ b/checkpoint-15000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:575b3e03f4bc94f34c23a730b6425b5e855f0cd539efdc34cd3347fb82c339bf +size 627 diff --git a/checkpoint-15000/trainer_state.json b/checkpoint-15000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8a55b161627a7a8f2aeafc7a663cc38b5bf5600e --- /dev/null +++ b/checkpoint-15000/trainer_state.json @@ -0,0 +1,9016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0617401921749747, + "global_step": 15000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 3.6513947491867034e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-15000/training_args.bin b/checkpoint-15000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-15000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-16000/README.md b/checkpoint-16000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-16000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-16000/adapter_config.json b/checkpoint-16000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-16000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-16000/adapter_model.bin b/checkpoint-16000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..29bb339aebf7e5bf58158830335629897e8f769f --- /dev/null +++ b/checkpoint-16000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f496e0a60fecf80cfb7783a66688b755b8b04e6b09226e48a01ae6cbf9bba542 +size 16821197 diff --git a/checkpoint-16000/finetuning_args.json b/checkpoint-16000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-16000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-16000/optimizer.pt b/checkpoint-16000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..711830de97c7cddd351d0efee620daae81ecfd8a --- /dev/null +++ b/checkpoint-16000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4555f5466819700333c9343787373be20d995ea0f01c60c4f7635b6e49504461 +size 33661637 diff --git a/checkpoint-16000/rng_state.pth b/checkpoint-16000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..37fe670d9be86947e74985a02ce82ff57dada0be --- /dev/null +++ b/checkpoint-16000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59a013ee51dc7395285c3eaddbd00c4044d85d6572193f4144fbf726fca6913d +size 18663 diff --git a/checkpoint-16000/scheduler.pt b/checkpoint-16000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..740177660033b6aa0e7f5a7e214fe23b41505bc8 --- /dev/null +++ b/checkpoint-16000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0b972efeba2b19939491cfd5e1faf046dc5bb08b5a6abda5346ae9a5a8041944 +size 627 diff --git a/checkpoint-16000/trainer_state.json b/checkpoint-16000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..7d10f127ece9360a0e6a3034a05b87c6c9055190 --- /dev/null +++ b/checkpoint-16000/trainer_state.json @@ -0,0 +1,9616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.1325228716533065, + "global_step": 16000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 3.8959455025538007e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-16000/training_args.bin b/checkpoint-16000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-16000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-17000/README.md b/checkpoint-17000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-17000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-17000/adapter_config.json b/checkpoint-17000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-17000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-17000/adapter_model.bin b/checkpoint-17000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..4e2ff20aef46fd3d11ed40bbace398841738d84c --- /dev/null +++ b/checkpoint-17000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9504a250a95f6835650f0f41eaf1a1d72e1626e22e6f2fe2544bddc72326c9e1 +size 16821197 diff --git a/checkpoint-17000/finetuning_args.json b/checkpoint-17000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-17000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-17000/optimizer.pt b/checkpoint-17000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..03af4f2401794c07a868d1aa8cf1000077c71d25 --- /dev/null +++ b/checkpoint-17000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f80e788b2f8bb4e5c133e019296673e0d599175069f2aabf8e36896996ffb06 +size 33661637 diff --git a/checkpoint-17000/rng_state.pth b/checkpoint-17000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2195afa7cdab6a4cd6c7e81b36e23522a2e04ac5 --- /dev/null +++ b/checkpoint-17000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0af87ddad8a3da9e83ad533f0626e190b5c3ab883099bf5487c2194ae730b076 +size 18663 diff --git a/checkpoint-17000/scheduler.pt b/checkpoint-17000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..70220a6996c42a384267bfdc1c960b34a7609f6c --- /dev/null +++ b/checkpoint-17000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3bc193d01e9a91df298ca535e83426ed66384825e4e92711a62e1c8ea8c8b064 +size 627 diff --git a/checkpoint-17000/trainer_state.json b/checkpoint-17000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8f68209225a0398f3013ff4b85f458d8183070cf --- /dev/null +++ b/checkpoint-17000/trainer_state.json @@ -0,0 +1,10216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2033055511316382, + "global_step": 17000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 4.139226889342419e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-17000/training_args.bin b/checkpoint-17000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-17000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-18000/README.md b/checkpoint-18000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-18000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-18000/adapter_config.json b/checkpoint-18000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-18000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-18000/adapter_model.bin b/checkpoint-18000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..23117f4be29769a5a7a4bd2c27cc31e065ad6e93 --- /dev/null +++ b/checkpoint-18000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c2c60b6e3ff14c80eaa94f324408768c9e04f7bd8d579f324c6bfdc2441a49e5 +size 16821197 diff --git a/checkpoint-18000/finetuning_args.json b/checkpoint-18000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-18000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-18000/optimizer.pt b/checkpoint-18000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e7db46b8dd5049c8430d10f99e8108d287c7ffce --- /dev/null +++ b/checkpoint-18000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8b88984bdb459e3ddf0b22b7cc056297e206435f8e3771827b0bf665b2ea95b5 +size 33661637 diff --git a/checkpoint-18000/rng_state.pth b/checkpoint-18000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2ae3bbcc80031fef89b1e8a3e2a3ee97142cb1d2 --- /dev/null +++ b/checkpoint-18000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd423b674468f866edfb5d9cd7143dd480fb7fb9d05535b1d947fce7f0beef96 +size 18663 diff --git a/checkpoint-18000/scheduler.pt b/checkpoint-18000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..7343cec976c1fba640c5f34950a27e82c48a9c96 --- /dev/null +++ b/checkpoint-18000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b37014dfe0d94e512ca2e2a30311a5e5a7e45242b078bbf0b99e66c23bb34872 +size 627 diff --git a/checkpoint-18000/trainer_state.json b/checkpoint-18000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..637d135ccae8c8b6fbcb6a2bd286de82ea199b2b --- /dev/null +++ b/checkpoint-18000/trainer_state.json @@ -0,0 +1,10816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.2740882306099697, + "global_step": 18000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 4.38237865187056e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-18000/training_args.bin b/checkpoint-18000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-18000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-19000/README.md b/checkpoint-19000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-19000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-19000/adapter_config.json b/checkpoint-19000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-19000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-19000/adapter_model.bin b/checkpoint-19000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ac3e1cb3b4f659c036dcdd602da046b16820de44 --- /dev/null +++ b/checkpoint-19000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6f97504f27e05efa35e302dccdf982389757b03260b71d260f4842b6d55a991f +size 16821197 diff --git a/checkpoint-19000/finetuning_args.json b/checkpoint-19000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-19000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-19000/optimizer.pt b/checkpoint-19000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b876330f3123390d7c895932123281d45e116f2e --- /dev/null +++ b/checkpoint-19000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f49c24dc20570b5108cd081c8d52078b8ecf043b3a887122395d218254f630f9 +size 33661637 diff --git a/checkpoint-19000/rng_state.pth b/checkpoint-19000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..e152cc3e09e72ba75025e12072c8b527a675faa0 --- /dev/null +++ b/checkpoint-19000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e473bf08b0f5d6b5fa78bc2db613bffaaccc98662d3ae7c26743e1fb22ede194 +size 18663 diff --git a/checkpoint-19000/scheduler.pt b/checkpoint-19000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..ed76275c18dc5a49ca1891fd9f9ced1c4b772272 --- /dev/null +++ b/checkpoint-19000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f01b4bd5490f887ab08b3729a16bb5d7b58307d8917325c27b6ef79e449d318 +size 627 diff --git a/checkpoint-19000/trainer_state.json b/checkpoint-19000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9d96ef6c51ae624a6d7c0ef7d0a5d6507616b70b --- /dev/null +++ b/checkpoint-19000/trainer_state.json @@ -0,0 +1,11416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.3448709100883014, + "global_step": 19000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 4.624817274807648e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-19000/training_args.bin b/checkpoint-19000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-19000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-2000/README.md b/checkpoint-2000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-2000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-2000/adapter_config.json b/checkpoint-2000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-2000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-2000/adapter_model.bin b/checkpoint-2000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..67b1d18f03d85c5c7a545e93a8504b23aabb3695 --- /dev/null +++ b/checkpoint-2000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2a2a17bb9ca5e629b285167d7fc845586c3b763ae6e20e7cb882a15533eb00b8 +size 16821197 diff --git a/checkpoint-2000/finetuning_args.json b/checkpoint-2000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-2000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-2000/optimizer.pt b/checkpoint-2000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0e9c3784caab87292d81bfe899decb162330b22e --- /dev/null +++ b/checkpoint-2000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d10bba765e1dcbe0cf3f0afec77e0abc8551a591d2b607561d76053b8eeee88e +size 33661637 diff --git a/checkpoint-2000/rng_state.pth b/checkpoint-2000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..b58b70aa3c2bf208532f6a204deca54f14107290 --- /dev/null +++ b/checkpoint-2000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ef8bff390866867f41191e9018441c9f1e0a4ef3e4d3403b3005cc86af3fa7e8 +size 18663 diff --git a/checkpoint-2000/scheduler.pt b/checkpoint-2000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b0d0394e80b024722362b79ef6db486d241d78d6 --- /dev/null +++ b/checkpoint-2000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3ffca6e606876403503284f02b53ef00fc80b685aed1cd756b8b15fec6ecc78e +size 627 diff --git a/checkpoint-2000/trainer_state.json b/checkpoint-2000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..cfa601ed9fe688d86967d75cf48e7f880b240a53 --- /dev/null +++ b/checkpoint-2000/trainer_state.json @@ -0,0 +1,1216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.1415653589566633, + "global_step": 2000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 4.86896752918528e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-2000/training_args.bin b/checkpoint-2000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-2000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-20000/README.md b/checkpoint-20000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-20000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-20000/adapter_config.json b/checkpoint-20000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-20000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-20000/adapter_model.bin b/checkpoint-20000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..014b39827e06c4c4274c0c9fd0539168d2f5a681 --- /dev/null +++ b/checkpoint-20000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6065553e1f2445195c9ebd7f50cc464029339bc523a409ef566508e8b3f49868 +size 16821197 diff --git a/checkpoint-20000/finetuning_args.json b/checkpoint-20000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-20000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-20000/optimizer.pt b/checkpoint-20000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f29d8dfc0d92a9d1e058c021ee6aa0aa5092ffdd --- /dev/null +++ b/checkpoint-20000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6ef538ee688865a5c7639e5de15acf8138f53da6e648d0e5cfce7eaa75c808a5 +size 33661637 diff --git a/checkpoint-20000/rng_state.pth b/checkpoint-20000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..df24fb96c88e8121f5b1184f61ba0bb7c747d431 --- /dev/null +++ b/checkpoint-20000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:33ab2ed7739c5b18e1476ab9b3b618511f9492ddd443b5880c556b135d9383d9 +size 18663 diff --git a/checkpoint-20000/scheduler.pt b/checkpoint-20000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..2dee17bcfca50cb58e2e30d8fff05c7cbd771323 --- /dev/null +++ b/checkpoint-20000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f2a1003e80a657f0dbaceb56f7a982ca5abbef7beef61b600c5d154734e64ee6 +size 627 diff --git a/checkpoint-20000/trainer_state.json b/checkpoint-20000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c87c57baf25722b22c4c83cfd03955c5084355b --- /dev/null +++ b/checkpoint-20000/trainer_state.json @@ -0,0 +1,12016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4156535895666331, + "global_step": 20000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 4.868087245304758e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-20000/training_args.bin b/checkpoint-20000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-20000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-21000/README.md b/checkpoint-21000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-21000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-21000/adapter_config.json b/checkpoint-21000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-21000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-21000/adapter_model.bin b/checkpoint-21000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..3d5d77e032f02da9e5ee112e509ab21cf19b743c --- /dev/null +++ b/checkpoint-21000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c185e5f5182b93b5b9473472c4b929c59c297b6030a68146922f6982932df69b +size 16821197 diff --git a/checkpoint-21000/finetuning_args.json b/checkpoint-21000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-21000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-21000/optimizer.pt b/checkpoint-21000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..6b49ae929caa5218334df7ad3d32ee3b38ee6da8 --- /dev/null +++ b/checkpoint-21000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:335fafead199063d66f3a7dc68de3fc62166cf9ccf43e2e0ee4f5f3b2d3aadc1 +size 33661637 diff --git a/checkpoint-21000/rng_state.pth b/checkpoint-21000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..acd2b0aa1f9c3207a97326d1b73611e9aedcda8a --- /dev/null +++ b/checkpoint-21000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73ed7fb3ae330faddbfd8cabd88a12aa9d2c294771bb65e2777eda4377051cea +size 18663 diff --git a/checkpoint-21000/scheduler.pt b/checkpoint-21000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..1ac2eb3fead354684c9118d3de34db53646c11bb --- /dev/null +++ b/checkpoint-21000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5f327f5bac167ff8be1e2f2efa8b1b85d25afd6f0872259147e4778743c9ee4f +size 627 diff --git a/checkpoint-21000/trainer_state.json b/checkpoint-21000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..166866199041006923c555c6e73f425f81e1abcc --- /dev/null +++ b/checkpoint-21000/trainer_state.json @@ -0,0 +1,12616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.4864362690449646, + "global_step": 21000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 5.111896522497196e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-21000/training_args.bin b/checkpoint-21000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-21000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-22000/README.md b/checkpoint-22000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-22000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-22000/adapter_config.json b/checkpoint-22000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-22000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-22000/adapter_model.bin b/checkpoint-22000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e2587a5dc38c725774e1154955bfa73e7c39f9c --- /dev/null +++ b/checkpoint-22000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b38ab8bc9cbf56e83147ad5ccd80303198799eb7788b0bfffc4eb05551091637 +size 16821197 diff --git a/checkpoint-22000/finetuning_args.json b/checkpoint-22000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-22000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-22000/optimizer.pt b/checkpoint-22000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..1857731e4d045d92cc8113517f4c34addb5f42dd --- /dev/null +++ b/checkpoint-22000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0f80ceb47fc8b937294d27d53d72f652701ea459bd9cc92259150c675a5ebf0f +size 33661637 diff --git a/checkpoint-22000/rng_state.pth b/checkpoint-22000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..2046351bedba232a28b82bb27f6cde95cb5c17d4 --- /dev/null +++ b/checkpoint-22000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4f51d1ee06e3d7d57a53773455bd9bdb699c7c3a1e15638496a364548b427b5f +size 18663 diff --git a/checkpoint-22000/scheduler.pt b/checkpoint-22000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6fc0c6d1a36c3deddee7b7cb506df649dcfa4daa --- /dev/null +++ b/checkpoint-22000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcf62e2d25e5301d25ae703735add8e9694c19f852baf80f219c5e3717fdc8fc +size 627 diff --git a/checkpoint-22000/trainer_state.json b/checkpoint-22000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..d0c6c4f1da1614422c7cfdb2935d0c06c0bcfca4 --- /dev/null +++ b/checkpoint-22000/trainer_state.json @@ -0,0 +1,13216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.5572189485232963, + "global_step": 22000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 5.355062937843139e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-22000/training_args.bin b/checkpoint-22000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-22000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-23000/README.md b/checkpoint-23000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-23000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-23000/adapter_config.json b/checkpoint-23000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-23000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-23000/adapter_model.bin b/checkpoint-23000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..05b8ad8eec271c4fd0b6319c3ae81391697210aa --- /dev/null +++ b/checkpoint-23000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:68aa6148561123036086427e8819e253131cafd4ccda666603386b1169f24024 +size 16821197 diff --git a/checkpoint-23000/finetuning_args.json b/checkpoint-23000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-23000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-23000/optimizer.pt b/checkpoint-23000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..3f7d8ef32289559e97a563f2c4c313c63cc42cd1 --- /dev/null +++ b/checkpoint-23000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bb8d9ab33a4977be24ba4aa35e4e5a9cdd990b4d81f99a3fa5086edd5085f7ef +size 33661637 diff --git a/checkpoint-23000/rng_state.pth b/checkpoint-23000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..12ae92e33fcc24bb8ea91df309f4b3dffbdc22a3 --- /dev/null +++ b/checkpoint-23000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dcfc42a4f0ff31b3c98912ad388b936b804478bc5e34238a45e70e71021ef9e2 +size 18663 diff --git a/checkpoint-23000/scheduler.pt b/checkpoint-23000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..5571b277c69eebb604d0b57b85726cc6c628333a --- /dev/null +++ b/checkpoint-23000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa9974a942ed007ec38ad3e510f503c3144c9e61df37a438b33724af41e9602b +size 627 diff --git a/checkpoint-23000/trainer_state.json b/checkpoint-23000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..091714cd3996fd7611b496e2f43bf4efc7030fcb --- /dev/null +++ b/checkpoint-23000/trainer_state.json @@ -0,0 +1,13816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6280016280016278, + "global_step": 23000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 5.598704294162334e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-23000/training_args.bin b/checkpoint-23000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-23000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-24000/README.md b/checkpoint-24000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-24000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-24000/adapter_config.json b/checkpoint-24000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-24000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-24000/adapter_model.bin b/checkpoint-24000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e52ed41a906ed14daa3a30d094a49b6c3b10a19 --- /dev/null +++ b/checkpoint-24000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aa391b7ae10944e3bb72131a2b98654736283c96b9effd44bf5937c171eb55e9 +size 16821197 diff --git a/checkpoint-24000/finetuning_args.json b/checkpoint-24000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-24000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-24000/optimizer.pt b/checkpoint-24000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..7cb5f9319bb673f69bec744375dad4a397380893 --- /dev/null +++ b/checkpoint-24000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b21ce56786aa6da1e1f36e278d05864d3227c1eab3e7baf16d2ce1bea3eefaa1 +size 33661637 diff --git a/checkpoint-24000/rng_state.pth b/checkpoint-24000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..347a4833d6de78fc759ad827bc96adeb80df8b12 --- /dev/null +++ b/checkpoint-24000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1fd61a28ef6989fa244f97227c4d8c9cbee963f7a3e321250db53762bf7ef7ff +size 18663 diff --git a/checkpoint-24000/scheduler.pt b/checkpoint-24000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3c8c90d16edb6e9e485e071b0e6afb601f443bd7 --- /dev/null +++ b/checkpoint-24000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:24870dbb8f30d2f9da5fa4f8a66f37b954c9e9b2dd879b2cd07844ac2c8128ea +size 627 diff --git a/checkpoint-24000/trainer_state.json b/checkpoint-24000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..c13d79c70a1cd56b0a292d09ac1ce8d08ce5a76c --- /dev/null +++ b/checkpoint-24000/trainer_state.json @@ -0,0 +1,14416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.6987843074799596, + "global_step": 24000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 4.143065996544804e-06, + "loss": 0.7359, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 4.1277527325498e-06, + "loss": 0.6894, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 4.112465273861799e-06, + "loss": 0.7237, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 4.097203639381364e-06, + "loss": 0.7028, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 4.081967847977164e-06, + "loss": 0.7038, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 4.066757918485886e-06, + "loss": 0.711, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 4.0515738697122694e-06, + "loss": 0.717, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 4.036415720429027e-06, + "loss": 0.7134, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 4.021283489376876e-06, + "loss": 0.709, + "step": 23090 + }, + { + "epoch": 1.64, + "learning_rate": 4.006177195264488e-06, + "loss": 0.7266, + "step": 23100 + }, + { + "epoch": 1.64, + "learning_rate": 3.9910968567684506e-06, + "loss": 0.6872, + "step": 23110 + }, + { + "epoch": 1.64, + "learning_rate": 3.976042492533269e-06, + "loss": 0.7256, + "step": 23120 + }, + { + "epoch": 1.64, + "learning_rate": 3.961014121171342e-06, + "loss": 0.7437, + "step": 23130 + }, + { + "epoch": 1.64, + "learning_rate": 3.946011761262932e-06, + "loss": 0.7111, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.931035431356134e-06, + "loss": 0.697, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.916085149966856e-06, + "loss": 0.7258, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.901160935578815e-06, + "loss": 0.7029, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.8862628066435065e-06, + "loss": 0.686, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.8713907815801534e-06, + "loss": 0.6994, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.856544878775708e-06, + "loss": 0.7039, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.841725116584846e-06, + "loss": 0.7096, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.8269315133299126e-06, + "loss": 0.7029, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.8121640873009067e-06, + "loss": 0.7133, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.7974228567554617e-06, + "loss": 0.7054, + "step": 23240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7827078399188393e-06, + "loss": 0.7077, + "step": 23250 + }, + { + "epoch": 1.65, + "learning_rate": 3.7680190549838847e-06, + "loss": 0.6985, + "step": 23260 + }, + { + "epoch": 1.65, + "learning_rate": 3.753356520111004e-06, + "loss": 0.7222, + "step": 23270 + }, + { + "epoch": 1.65, + "learning_rate": 3.738720253428152e-06, + "loss": 0.7174, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.724110273030812e-06, + "loss": 0.6935, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.709526596981977e-06, + "loss": 0.7094, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.6949692433120907e-06, + "loss": 0.7029, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.6804382300190844e-06, + "loss": 0.7145, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.665933575068298e-06, + "loss": 0.7247, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.6514552963925004e-06, + "loss": 0.7393, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.637003411891854e-06, + "loss": 0.7352, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.622577939433866e-06, + "loss": 0.6873, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.6081788968534066e-06, + "loss": 0.7056, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.5938063019526653e-06, + "loss": 0.7287, + "step": 23380 + }, + { + "epoch": 1.66, + "learning_rate": 3.579460172501142e-06, + "loss": 0.717, + "step": 23390 + }, + { + "epoch": 1.66, + "learning_rate": 3.5651405262356024e-06, + "loss": 0.7258, + "step": 23400 + }, + { + "epoch": 1.66, + "learning_rate": 3.5508473808600674e-06, + "loss": 0.6985, + "step": 23410 + }, + { + "epoch": 1.66, + "learning_rate": 3.5365807540458097e-06, + "loss": 0.7059, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.522340663431314e-06, + "loss": 0.7047, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.5081271266222434e-06, + "loss": 0.7064, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.4939401611914337e-06, + "loss": 0.6804, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.479779784678877e-06, + "loss": 0.7099, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 3.465646014591703e-06, + "loss": 0.7182, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4515388684041193e-06, + "loss": 0.6964, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 3.437458363557433e-06, + "loss": 0.707, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 3.4234045174600103e-06, + "loss": 0.6729, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 3.409377347487272e-06, + "loss": 0.6822, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 3.395376870981634e-06, + "loss": 0.7339, + "step": 23520 + }, + { + "epoch": 1.67, + "learning_rate": 3.3814031052525175e-06, + "loss": 0.7011, + "step": 23530 + }, + { + "epoch": 1.67, + "learning_rate": 3.367456067576327e-06, + "loss": 0.7216, + "step": 23540 + }, + { + "epoch": 1.67, + "learning_rate": 3.353535775196423e-06, + "loss": 0.7194, + "step": 23550 + }, + { + "epoch": 1.67, + "learning_rate": 3.339642245323102e-06, + "loss": 0.7163, + "step": 23560 + }, + { + "epoch": 1.67, + "learning_rate": 3.325775495133546e-06, + "loss": 0.736, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 3.31193554177186e-06, + "loss": 0.7071, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 3.298122402349002e-06, + "loss": 0.6889, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 3.2843360939427943e-06, + "loss": 0.6933, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 3.270576633597866e-06, + "loss": 0.699, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 3.2568440383256598e-06, + "loss": 0.6603, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 3.243138325104411e-06, + "loss": 0.6971, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 3.230826181364585e-06, + "loss": 0.7269, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 3.217171590696141e-06, + "loss": 0.7277, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 3.2035439311275006e-06, + "loss": 0.6943, + "step": 23660 + }, + { + "epoch": 1.68, + "learning_rate": 3.1899432195071575e-06, + "loss": 0.7204, + "step": 23670 + }, + { + "epoch": 1.68, + "learning_rate": 3.176369472650292e-06, + "loss": 0.7241, + "step": 23680 + }, + { + "epoch": 1.68, + "learning_rate": 3.1628227073387474e-06, + "loss": 0.6712, + "step": 23690 + }, + { + "epoch": 1.68, + "learning_rate": 3.1493029403209973e-06, + "loss": 0.6877, + "step": 23700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1358101883121547e-06, + "loss": 0.6953, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 3.122344467993907e-06, + "loss": 0.6918, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 3.1089057960145498e-06, + "loss": 0.6866, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 3.0954941889889096e-06, + "loss": 0.6975, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 3.082109663498378e-06, + "loss": 0.7213, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 3.068752236090841e-06, + "loss": 0.7225, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 3.055421923280702e-06, + "loss": 0.7064, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 3.0421187415488246e-06, + "loss": 0.696, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 3.028842707342541e-06, + "loss": 0.7251, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 3.0155938370756116e-06, + "loss": 0.7075, + "step": 23800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0023721471282214e-06, + "loss": 0.7181, + "step": 23810 + }, + { + "epoch": 1.69, + "learning_rate": 2.9891776538469362e-06, + "loss": 0.6982, + "step": 23820 + }, + { + "epoch": 1.69, + "learning_rate": 2.9760103735447186e-06, + "loss": 0.6984, + "step": 23830 + }, + { + "epoch": 1.69, + "learning_rate": 2.962870322500866e-06, + "loss": 0.7363, + "step": 23840 + }, + { + "epoch": 1.69, + "learning_rate": 2.9497575169610243e-06, + "loss": 0.6743, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.9366719731371563e-06, + "loss": 0.7141, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.9236137072075067e-06, + "loss": 0.7228, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.910582735316597e-06, + "loss": 0.7028, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.8975790735752186e-06, + "loss": 0.7098, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.8846027380603908e-06, + "loss": 0.6907, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.8716537448153364e-06, + "loss": 0.7226, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.8587321098494963e-06, + "loss": 0.7298, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.8458378491384606e-06, + "loss": 0.7172, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.832970978624e-06, + "loss": 0.7065, + "step": 23940 + }, + { + "epoch": 1.7, + "learning_rate": 2.8201315142140055e-06, + "loss": 0.6787, + "step": 23950 + }, + { + "epoch": 1.7, + "learning_rate": 2.8073194717824935e-06, + "loss": 0.6846, + "step": 23960 + }, + { + "epoch": 1.7, + "learning_rate": 2.794534867169568e-06, + "loss": 0.7014, + "step": 23970 + }, + { + "epoch": 1.7, + "learning_rate": 2.7817777161814245e-06, + "loss": 0.721, + "step": 23980 + }, + { + "epoch": 1.7, + "learning_rate": 2.769048034590299e-06, + "loss": 0.7459, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.7563458381344858e-06, + "loss": 0.6801, + "step": 24000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 5.841638666115482e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-24000/training_args.bin b/checkpoint-24000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-24000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-25000/README.md b/checkpoint-25000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-25000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-25000/adapter_config.json b/checkpoint-25000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-25000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-25000/adapter_model.bin b/checkpoint-25000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..8a52e07473003cbbae4bbb0dd1846a3e3a8f30cc --- /dev/null +++ b/checkpoint-25000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7d21dc36ff8a031a6f922885371e42941bb35b6c5ce31aadcc813fa21dfb7e8 +size 16821197 diff --git a/checkpoint-25000/finetuning_args.json b/checkpoint-25000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-25000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-25000/optimizer.pt b/checkpoint-25000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..84c2b2c01dab85f528f7aeecf19c9e6ad6ddfad3 --- /dev/null +++ b/checkpoint-25000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:072d6ef9f44bc0c11291e3a82578d3278e5029858327a6564ae9deb9012281f8 +size 33661637 diff --git a/checkpoint-25000/rng_state.pth b/checkpoint-25000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7cc3cbe16775ff2a250770da93c564ac6ed30792 --- /dev/null +++ b/checkpoint-25000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7a3ace5d247e1970a81c68a201e02f207baf2ba350ebe2b8508c528e867539ce +size 18663 diff --git a/checkpoint-25000/scheduler.pt b/checkpoint-25000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..f1a078554f1c1c374b79bd2e38d9a4b8cc4bc45d --- /dev/null +++ b/checkpoint-25000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f1de760acc39abe4c6a2decf0af384d05491514388866861a10f6ec2f6c35c2f +size 627 diff --git a/checkpoint-25000/trainer_state.json b/checkpoint-25000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..8c5962964a5be2807756c5b989275639ee28ab90 --- /dev/null +++ b/checkpoint-25000/trainer_state.json @@ -0,0 +1,15016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.7695669869582913, + "global_step": 25000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 4.143065996544804e-06, + "loss": 0.7359, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 4.1277527325498e-06, + "loss": 0.6894, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 4.112465273861799e-06, + "loss": 0.7237, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 4.097203639381364e-06, + "loss": 0.7028, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 4.081967847977164e-06, + "loss": 0.7038, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 4.066757918485886e-06, + "loss": 0.711, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 4.0515738697122694e-06, + "loss": 0.717, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 4.036415720429027e-06, + "loss": 0.7134, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 4.021283489376876e-06, + "loss": 0.709, + "step": 23090 + }, + { + "epoch": 1.64, + "learning_rate": 4.006177195264488e-06, + "loss": 0.7266, + "step": 23100 + }, + { + "epoch": 1.64, + "learning_rate": 3.9910968567684506e-06, + "loss": 0.6872, + "step": 23110 + }, + { + "epoch": 1.64, + "learning_rate": 3.976042492533269e-06, + "loss": 0.7256, + "step": 23120 + }, + { + "epoch": 1.64, + "learning_rate": 3.961014121171342e-06, + "loss": 0.7437, + "step": 23130 + }, + { + "epoch": 1.64, + "learning_rate": 3.946011761262932e-06, + "loss": 0.7111, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.931035431356134e-06, + "loss": 0.697, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.916085149966856e-06, + "loss": 0.7258, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.901160935578815e-06, + "loss": 0.7029, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.8862628066435065e-06, + "loss": 0.686, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.8713907815801534e-06, + "loss": 0.6994, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.856544878775708e-06, + "loss": 0.7039, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.841725116584846e-06, + "loss": 0.7096, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.8269315133299126e-06, + "loss": 0.7029, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.8121640873009067e-06, + "loss": 0.7133, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.7974228567554617e-06, + "loss": 0.7054, + "step": 23240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7827078399188393e-06, + "loss": 0.7077, + "step": 23250 + }, + { + "epoch": 1.65, + "learning_rate": 3.7680190549838847e-06, + "loss": 0.6985, + "step": 23260 + }, + { + "epoch": 1.65, + "learning_rate": 3.753356520111004e-06, + "loss": 0.7222, + "step": 23270 + }, + { + "epoch": 1.65, + "learning_rate": 3.738720253428152e-06, + "loss": 0.7174, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.724110273030812e-06, + "loss": 0.6935, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.709526596981977e-06, + "loss": 0.7094, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.6949692433120907e-06, + "loss": 0.7029, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.6804382300190844e-06, + "loss": 0.7145, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.665933575068298e-06, + "loss": 0.7247, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.6514552963925004e-06, + "loss": 0.7393, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.637003411891854e-06, + "loss": 0.7352, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.622577939433866e-06, + "loss": 0.6873, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.6081788968534066e-06, + "loss": 0.7056, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.5938063019526653e-06, + "loss": 0.7287, + "step": 23380 + }, + { + "epoch": 1.66, + "learning_rate": 3.579460172501142e-06, + "loss": 0.717, + "step": 23390 + }, + { + "epoch": 1.66, + "learning_rate": 3.5651405262356024e-06, + "loss": 0.7258, + "step": 23400 + }, + { + "epoch": 1.66, + "learning_rate": 3.5508473808600674e-06, + "loss": 0.6985, + "step": 23410 + }, + { + "epoch": 1.66, + "learning_rate": 3.5365807540458097e-06, + "loss": 0.7059, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.522340663431314e-06, + "loss": 0.7047, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.5081271266222434e-06, + "loss": 0.7064, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.4939401611914337e-06, + "loss": 0.6804, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.479779784678877e-06, + "loss": 0.7099, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 3.465646014591703e-06, + "loss": 0.7182, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4515388684041193e-06, + "loss": 0.6964, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 3.437458363557433e-06, + "loss": 0.707, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 3.4234045174600103e-06, + "loss": 0.6729, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 3.409377347487272e-06, + "loss": 0.6822, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 3.395376870981634e-06, + "loss": 0.7339, + "step": 23520 + }, + { + "epoch": 1.67, + "learning_rate": 3.3814031052525175e-06, + "loss": 0.7011, + "step": 23530 + }, + { + "epoch": 1.67, + "learning_rate": 3.367456067576327e-06, + "loss": 0.7216, + "step": 23540 + }, + { + "epoch": 1.67, + "learning_rate": 3.353535775196423e-06, + "loss": 0.7194, + "step": 23550 + }, + { + "epoch": 1.67, + "learning_rate": 3.339642245323102e-06, + "loss": 0.7163, + "step": 23560 + }, + { + "epoch": 1.67, + "learning_rate": 3.325775495133546e-06, + "loss": 0.736, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 3.31193554177186e-06, + "loss": 0.7071, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 3.298122402349002e-06, + "loss": 0.6889, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 3.2843360939427943e-06, + "loss": 0.6933, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 3.270576633597866e-06, + "loss": 0.699, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 3.2568440383256598e-06, + "loss": 0.6603, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 3.243138325104411e-06, + "loss": 0.6971, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 3.230826181364585e-06, + "loss": 0.7269, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 3.217171590696141e-06, + "loss": 0.7277, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 3.2035439311275006e-06, + "loss": 0.6943, + "step": 23660 + }, + { + "epoch": 1.68, + "learning_rate": 3.1899432195071575e-06, + "loss": 0.7204, + "step": 23670 + }, + { + "epoch": 1.68, + "learning_rate": 3.176369472650292e-06, + "loss": 0.7241, + "step": 23680 + }, + { + "epoch": 1.68, + "learning_rate": 3.1628227073387474e-06, + "loss": 0.6712, + "step": 23690 + }, + { + "epoch": 1.68, + "learning_rate": 3.1493029403209973e-06, + "loss": 0.6877, + "step": 23700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1358101883121547e-06, + "loss": 0.6953, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 3.122344467993907e-06, + "loss": 0.6918, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 3.1089057960145498e-06, + "loss": 0.6866, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 3.0954941889889096e-06, + "loss": 0.6975, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 3.082109663498378e-06, + "loss": 0.7213, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 3.068752236090841e-06, + "loss": 0.7225, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 3.055421923280702e-06, + "loss": 0.7064, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 3.0421187415488246e-06, + "loss": 0.696, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 3.028842707342541e-06, + "loss": 0.7251, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 3.0155938370756116e-06, + "loss": 0.7075, + "step": 23800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0023721471282214e-06, + "loss": 0.7181, + "step": 23810 + }, + { + "epoch": 1.69, + "learning_rate": 2.9891776538469362e-06, + "loss": 0.6982, + "step": 23820 + }, + { + "epoch": 1.69, + "learning_rate": 2.9760103735447186e-06, + "loss": 0.6984, + "step": 23830 + }, + { + "epoch": 1.69, + "learning_rate": 2.962870322500866e-06, + "loss": 0.7363, + "step": 23840 + }, + { + "epoch": 1.69, + "learning_rate": 2.9497575169610243e-06, + "loss": 0.6743, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.9366719731371563e-06, + "loss": 0.7141, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.9236137072075067e-06, + "loss": 0.7228, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.910582735316597e-06, + "loss": 0.7028, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.8975790735752186e-06, + "loss": 0.7098, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.8846027380603908e-06, + "loss": 0.6907, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.8716537448153364e-06, + "loss": 0.7226, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.8587321098494963e-06, + "loss": 0.7298, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.8458378491384606e-06, + "loss": 0.7172, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.832970978624e-06, + "loss": 0.7065, + "step": 23940 + }, + { + "epoch": 1.7, + "learning_rate": 2.8201315142140055e-06, + "loss": 0.6787, + "step": 23950 + }, + { + "epoch": 1.7, + "learning_rate": 2.8073194717824935e-06, + "loss": 0.6846, + "step": 23960 + }, + { + "epoch": 1.7, + "learning_rate": 2.794534867169568e-06, + "loss": 0.7014, + "step": 23970 + }, + { + "epoch": 1.7, + "learning_rate": 2.7817777161814245e-06, + "loss": 0.721, + "step": 23980 + }, + { + "epoch": 1.7, + "learning_rate": 2.769048034590299e-06, + "loss": 0.7459, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.7563458381344858e-06, + "loss": 0.6801, + "step": 24000 + }, + { + "epoch": 1.7, + "learning_rate": 2.7436711425182726e-06, + "loss": 0.7142, + "step": 24010 + }, + { + "epoch": 1.7, + "learning_rate": 2.7310239634119744e-06, + "loss": 0.6988, + "step": 24020 + }, + { + "epoch": 1.7, + "learning_rate": 2.718404316451864e-06, + "loss": 0.7297, + "step": 24030 + }, + { + "epoch": 1.7, + "learning_rate": 2.7058122172401916e-06, + "loss": 0.672, + "step": 24040 + }, + { + "epoch": 1.7, + "learning_rate": 2.693247681345132e-06, + "loss": 0.7278, + "step": 24050 + }, + { + "epoch": 1.7, + "learning_rate": 2.680710724300803e-06, + "loss": 0.7055, + "step": 24060 + }, + { + "epoch": 1.7, + "learning_rate": 2.6682013616072005e-06, + "loss": 0.7266, + "step": 24070 + }, + { + "epoch": 1.7, + "learning_rate": 2.655719608730231e-06, + "loss": 0.7121, + "step": 24080 + }, + { + "epoch": 1.71, + "learning_rate": 2.6432654811016395e-06, + "loss": 0.6943, + "step": 24090 + }, + { + "epoch": 1.71, + "learning_rate": 2.630838994119042e-06, + "loss": 0.709, + "step": 24100 + }, + { + "epoch": 1.71, + "learning_rate": 2.618440163145855e-06, + "loss": 0.7115, + "step": 24110 + }, + { + "epoch": 1.71, + "learning_rate": 2.60606900351133e-06, + "loss": 0.7056, + "step": 24120 + }, + { + "epoch": 1.71, + "learning_rate": 2.5937255305104825e-06, + "loss": 0.7071, + "step": 24130 + }, + { + "epoch": 1.71, + "learning_rate": 2.581409759404113e-06, + "loss": 0.6953, + "step": 24140 + }, + { + "epoch": 1.71, + "learning_rate": 2.5691217054187726e-06, + "loss": 0.7178, + "step": 24150 + }, + { + "epoch": 1.71, + "learning_rate": 2.556861383746731e-06, + "loss": 0.7034, + "step": 24160 + }, + { + "epoch": 1.71, + "learning_rate": 2.54462880954599e-06, + "loss": 0.7028, + "step": 24170 + }, + { + "epoch": 1.71, + "learning_rate": 2.532423997940231e-06, + "loss": 0.6898, + "step": 24180 + }, + { + "epoch": 1.71, + "learning_rate": 2.5202469640188187e-06, + "loss": 0.7106, + "step": 24190 + }, + { + "epoch": 1.71, + "learning_rate": 2.508097722836769e-06, + "loss": 0.7149, + "step": 24200 + }, + { + "epoch": 1.71, + "learning_rate": 2.4959762894147503e-06, + "loss": 0.7059, + "step": 24210 + }, + { + "epoch": 1.71, + "learning_rate": 2.48388267873903e-06, + "loss": 0.6858, + "step": 24220 + }, + { + "epoch": 1.72, + "learning_rate": 2.4718169057614953e-06, + "loss": 0.6949, + "step": 24230 + }, + { + "epoch": 1.72, + "learning_rate": 2.4597789853996022e-06, + "loss": 0.7374, + "step": 24240 + }, + { + "epoch": 1.72, + "learning_rate": 2.4477689325363875e-06, + "loss": 0.6925, + "step": 24250 + }, + { + "epoch": 1.72, + "learning_rate": 2.4357867620204174e-06, + "loss": 0.7254, + "step": 24260 + }, + { + "epoch": 1.72, + "learning_rate": 2.4238324886657976e-06, + "loss": 0.7108, + "step": 24270 + }, + { + "epoch": 1.72, + "learning_rate": 2.411906127252134e-06, + "loss": 0.7205, + "step": 24280 + }, + { + "epoch": 1.72, + "learning_rate": 2.4000076925245364e-06, + "loss": 0.7125, + "step": 24290 + }, + { + "epoch": 1.72, + "learning_rate": 2.388137199193571e-06, + "loss": 0.7166, + "step": 24300 + }, + { + "epoch": 1.72, + "learning_rate": 2.3762946619352773e-06, + "loss": 0.7536, + "step": 24310 + }, + { + "epoch": 1.72, + "learning_rate": 2.364480095391114e-06, + "loss": 0.6741, + "step": 24320 + }, + { + "epoch": 1.72, + "learning_rate": 2.3526935141679695e-06, + "loss": 0.7399, + "step": 24330 + }, + { + "epoch": 1.72, + "learning_rate": 2.3409349328381264e-06, + "loss": 0.6885, + "step": 24340 + }, + { + "epoch": 1.72, + "learning_rate": 2.329204365939261e-06, + "loss": 0.6654, + "step": 24350 + }, + { + "epoch": 1.72, + "learning_rate": 2.3175018279743943e-06, + "loss": 0.7019, + "step": 24360 + }, + { + "epoch": 1.72, + "learning_rate": 2.305827333411914e-06, + "loss": 0.6802, + "step": 24370 + }, + { + "epoch": 1.73, + "learning_rate": 2.2941808966855217e-06, + "loss": 0.6845, + "step": 24380 + }, + { + "epoch": 1.73, + "learning_rate": 2.2825625321942433e-06, + "loss": 0.7087, + "step": 24390 + }, + { + "epoch": 1.73, + "learning_rate": 2.270972254302381e-06, + "loss": 0.6803, + "step": 24400 + }, + { + "epoch": 1.73, + "learning_rate": 2.259410077339527e-06, + "loss": 0.7212, + "step": 24410 + }, + { + "epoch": 1.73, + "learning_rate": 2.2478760156005278e-06, + "loss": 0.7025, + "step": 24420 + }, + { + "epoch": 1.73, + "learning_rate": 2.2363700833454627e-06, + "loss": 0.6774, + "step": 24430 + }, + { + "epoch": 1.73, + "learning_rate": 2.2248922947996464e-06, + "loss": 0.7134, + "step": 24440 + }, + { + "epoch": 1.73, + "learning_rate": 2.2134426641535756e-06, + "loss": 0.71, + "step": 24450 + }, + { + "epoch": 1.73, + "learning_rate": 2.202021205562965e-06, + "loss": 0.7253, + "step": 24460 + }, + { + "epoch": 1.73, + "learning_rate": 2.19062793314867e-06, + "loss": 0.7008, + "step": 24470 + }, + { + "epoch": 1.73, + "learning_rate": 2.179262860996717e-06, + "loss": 0.7299, + "step": 24480 + }, + { + "epoch": 1.73, + "learning_rate": 2.1679260031582575e-06, + "loss": 0.7259, + "step": 24490 + }, + { + "epoch": 1.73, + "learning_rate": 2.1566173736495663e-06, + "loss": 0.6953, + "step": 24500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1453369864520125e-06, + "loss": 0.7024, + "step": 24510 + }, + { + "epoch": 1.74, + "learning_rate": 2.134084855512056e-06, + "loss": 0.7059, + "step": 24520 + }, + { + "epoch": 1.74, + "learning_rate": 2.1228609947412074e-06, + "loss": 0.6975, + "step": 24530 + }, + { + "epoch": 1.74, + "learning_rate": 2.111665418016051e-06, + "loss": 0.7137, + "step": 24540 + }, + { + "epoch": 1.74, + "learning_rate": 2.1004981391781697e-06, + "loss": 0.7219, + "step": 24550 + }, + { + "epoch": 1.74, + "learning_rate": 2.0893591720341888e-06, + "loss": 0.7267, + "step": 24560 + }, + { + "epoch": 1.74, + "learning_rate": 2.0782485303557144e-06, + "loss": 0.7114, + "step": 24570 + }, + { + "epoch": 1.74, + "learning_rate": 2.0671662278793387e-06, + "loss": 0.7075, + "step": 24580 + }, + { + "epoch": 1.74, + "learning_rate": 2.0561122783066128e-06, + "loss": 0.6794, + "step": 24590 + }, + { + "epoch": 1.74, + "learning_rate": 2.0450866953040436e-06, + "loss": 0.7195, + "step": 24600 + }, + { + "epoch": 1.74, + "learning_rate": 2.034089492503052e-06, + "loss": 0.7097, + "step": 24610 + }, + { + "epoch": 1.74, + "learning_rate": 2.0231206834999793e-06, + "loss": 0.7114, + "step": 24620 + }, + { + "epoch": 1.74, + "learning_rate": 2.0121802818560716e-06, + "loss": 0.7295, + "step": 24630 + }, + { + "epoch": 1.74, + "learning_rate": 2.0012683010974366e-06, + "loss": 0.7161, + "step": 24640 + }, + { + "epoch": 1.74, + "learning_rate": 1.9903847547150495e-06, + "loss": 0.6945, + "step": 24650 + }, + { + "epoch": 1.75, + "learning_rate": 1.979529656164736e-06, + "loss": 0.7025, + "step": 24660 + }, + { + "epoch": 1.75, + "learning_rate": 1.9687030188671517e-06, + "loss": 0.7047, + "step": 24670 + }, + { + "epoch": 1.75, + "learning_rate": 1.957904856207754e-06, + "loss": 0.6837, + "step": 24680 + }, + { + "epoch": 1.75, + "learning_rate": 1.9471351815367995e-06, + "loss": 0.7309, + "step": 24690 + }, + { + "epoch": 1.75, + "learning_rate": 1.9363940081693287e-06, + "loss": 0.7112, + "step": 24700 + }, + { + "epoch": 1.75, + "learning_rate": 1.92568134938515e-06, + "loss": 0.698, + "step": 24710 + }, + { + "epoch": 1.75, + "learning_rate": 1.914997218428796e-06, + "loss": 0.7365, + "step": 24720 + }, + { + "epoch": 1.75, + "learning_rate": 1.9043416285095568e-06, + "loss": 0.6988, + "step": 24730 + }, + { + "epoch": 1.75, + "learning_rate": 1.8937145928014073e-06, + "loss": 0.7022, + "step": 24740 + }, + { + "epoch": 1.75, + "learning_rate": 1.883116124443049e-06, + "loss": 0.7207, + "step": 24750 + }, + { + "epoch": 1.75, + "learning_rate": 1.8725462365378366e-06, + "loss": 0.7202, + "step": 24760 + }, + { + "epoch": 1.75, + "learning_rate": 1.862004942153811e-06, + "loss": 0.7263, + "step": 24770 + }, + { + "epoch": 1.75, + "learning_rate": 1.85149225432365e-06, + "loss": 0.7123, + "step": 24780 + }, + { + "epoch": 1.75, + "learning_rate": 1.8410081860446682e-06, + "loss": 0.7025, + "step": 24790 + }, + { + "epoch": 1.76, + "learning_rate": 1.8305527502787912e-06, + "loss": 0.7225, + "step": 24800 + }, + { + "epoch": 1.76, + "learning_rate": 1.8201259599525567e-06, + "loss": 0.6956, + "step": 24810 + }, + { + "epoch": 1.76, + "learning_rate": 1.8097278279570696e-06, + "loss": 0.716, + "step": 24820 + }, + { + "epoch": 1.76, + "learning_rate": 1.7993583671480213e-06, + "loss": 0.6895, + "step": 24830 + }, + { + "epoch": 1.76, + "learning_rate": 1.7890175903456403e-06, + "loss": 0.7089, + "step": 24840 + }, + { + "epoch": 1.76, + "learning_rate": 1.778705510334705e-06, + "loss": 0.7165, + "step": 24850 + }, + { + "epoch": 1.76, + "learning_rate": 1.7684221398645007e-06, + "loss": 0.6951, + "step": 24860 + }, + { + "epoch": 1.76, + "learning_rate": 1.7581674916488267e-06, + "loss": 0.6925, + "step": 24870 + }, + { + "epoch": 1.76, + "learning_rate": 1.7479415783659774e-06, + "loss": 0.724, + "step": 24880 + }, + { + "epoch": 1.76, + "learning_rate": 1.7377444126587094e-06, + "loss": 0.6585, + "step": 24890 + }, + { + "epoch": 1.76, + "learning_rate": 1.7275760071342379e-06, + "loss": 0.7301, + "step": 24900 + }, + { + "epoch": 1.76, + "learning_rate": 1.7174363743642257e-06, + "loss": 0.6777, + "step": 24910 + }, + { + "epoch": 1.76, + "learning_rate": 1.7073255268847676e-06, + "loss": 0.6788, + "step": 24920 + }, + { + "epoch": 1.76, + "learning_rate": 1.6972434771963586e-06, + "loss": 0.7013, + "step": 24930 + }, + { + "epoch": 1.77, + "learning_rate": 1.6871902377638915e-06, + "loss": 0.6847, + "step": 24940 + }, + { + "epoch": 1.77, + "learning_rate": 1.6771658210166436e-06, + "loss": 0.7232, + "step": 24950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6671702393482646e-06, + "loss": 0.7024, + "step": 24960 + }, + { + "epoch": 1.77, + "learning_rate": 1.657203505116739e-06, + "loss": 0.674, + "step": 24970 + }, + { + "epoch": 1.77, + "learning_rate": 1.647265630644393e-06, + "loss": 0.7264, + "step": 24980 + }, + { + "epoch": 1.77, + "learning_rate": 1.6373566282178704e-06, + "loss": 0.703, + "step": 24990 + }, + { + "epoch": 1.77, + "learning_rate": 1.6274765100881272e-06, + "loss": 0.7121, + "step": 25000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 6.085120844504236e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-25000/training_args.bin b/checkpoint-25000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-25000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-26000/README.md b/checkpoint-26000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-26000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-26000/adapter_config.json b/checkpoint-26000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-26000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-26000/adapter_model.bin b/checkpoint-26000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff2e93ac00c625ccd832288e622ce6de4a0b9a8e --- /dev/null +++ b/checkpoint-26000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:da23e2d0e2138d2b2580e3cdad4192253969a4a0f5fd3a20e6038e3051d55505 +size 16821197 diff --git a/checkpoint-26000/finetuning_args.json b/checkpoint-26000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-26000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-26000/optimizer.pt b/checkpoint-26000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..34d795a654921f865fc61ad4352edb7f71e70673 --- /dev/null +++ b/checkpoint-26000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1c48ffa81dabd187e2e62b5d6a552cc22bd8e025a100ee717ee30a93884c13c +size 33661637 diff --git a/checkpoint-26000/rng_state.pth b/checkpoint-26000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..99bc809a28c049dfc47b12fab558c4a322ee08f2 --- /dev/null +++ b/checkpoint-26000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e2f77274515f33fef9a68619722dfe86813b61bfc0a6fb5840243eedf156ce8 +size 18663 diff --git a/checkpoint-26000/scheduler.pt b/checkpoint-26000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..6bf6c07f92bfd141eb6784766561b6458f1e3ded --- /dev/null +++ b/checkpoint-26000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5e45ff16eb9de87cfc8258a8da4c3da855b98d2c6c768d6398919a83ead4019c +size 627 diff --git a/checkpoint-26000/trainer_state.json b/checkpoint-26000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..9ad9473615f461f66a61ae7c2665ff6586d2e1e9 --- /dev/null +++ b/checkpoint-26000/trainer_state.json @@ -0,0 +1,15616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.840349666436623, + "global_step": 26000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 4.143065996544804e-06, + "loss": 0.7359, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 4.1277527325498e-06, + "loss": 0.6894, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 4.112465273861799e-06, + "loss": 0.7237, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 4.097203639381364e-06, + "loss": 0.7028, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 4.081967847977164e-06, + "loss": 0.7038, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 4.066757918485886e-06, + "loss": 0.711, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 4.0515738697122694e-06, + "loss": 0.717, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 4.036415720429027e-06, + "loss": 0.7134, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 4.021283489376876e-06, + "loss": 0.709, + "step": 23090 + }, + { + "epoch": 1.64, + "learning_rate": 4.006177195264488e-06, + "loss": 0.7266, + "step": 23100 + }, + { + "epoch": 1.64, + "learning_rate": 3.9910968567684506e-06, + "loss": 0.6872, + "step": 23110 + }, + { + "epoch": 1.64, + "learning_rate": 3.976042492533269e-06, + "loss": 0.7256, + "step": 23120 + }, + { + "epoch": 1.64, + "learning_rate": 3.961014121171342e-06, + "loss": 0.7437, + "step": 23130 + }, + { + "epoch": 1.64, + "learning_rate": 3.946011761262932e-06, + "loss": 0.7111, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.931035431356134e-06, + "loss": 0.697, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.916085149966856e-06, + "loss": 0.7258, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.901160935578815e-06, + "loss": 0.7029, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.8862628066435065e-06, + "loss": 0.686, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.8713907815801534e-06, + "loss": 0.6994, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.856544878775708e-06, + "loss": 0.7039, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.841725116584846e-06, + "loss": 0.7096, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.8269315133299126e-06, + "loss": 0.7029, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.8121640873009067e-06, + "loss": 0.7133, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.7974228567554617e-06, + "loss": 0.7054, + "step": 23240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7827078399188393e-06, + "loss": 0.7077, + "step": 23250 + }, + { + "epoch": 1.65, + "learning_rate": 3.7680190549838847e-06, + "loss": 0.6985, + "step": 23260 + }, + { + "epoch": 1.65, + "learning_rate": 3.753356520111004e-06, + "loss": 0.7222, + "step": 23270 + }, + { + "epoch": 1.65, + "learning_rate": 3.738720253428152e-06, + "loss": 0.7174, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.724110273030812e-06, + "loss": 0.6935, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.709526596981977e-06, + "loss": 0.7094, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.6949692433120907e-06, + "loss": 0.7029, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.6804382300190844e-06, + "loss": 0.7145, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.665933575068298e-06, + "loss": 0.7247, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.6514552963925004e-06, + "loss": 0.7393, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.637003411891854e-06, + "loss": 0.7352, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.622577939433866e-06, + "loss": 0.6873, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.6081788968534066e-06, + "loss": 0.7056, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.5938063019526653e-06, + "loss": 0.7287, + "step": 23380 + }, + { + "epoch": 1.66, + "learning_rate": 3.579460172501142e-06, + "loss": 0.717, + "step": 23390 + }, + { + "epoch": 1.66, + "learning_rate": 3.5651405262356024e-06, + "loss": 0.7258, + "step": 23400 + }, + { + "epoch": 1.66, + "learning_rate": 3.5508473808600674e-06, + "loss": 0.6985, + "step": 23410 + }, + { + "epoch": 1.66, + "learning_rate": 3.5365807540458097e-06, + "loss": 0.7059, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.522340663431314e-06, + "loss": 0.7047, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.5081271266222434e-06, + "loss": 0.7064, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.4939401611914337e-06, + "loss": 0.6804, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.479779784678877e-06, + "loss": 0.7099, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 3.465646014591703e-06, + "loss": 0.7182, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4515388684041193e-06, + "loss": 0.6964, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 3.437458363557433e-06, + "loss": 0.707, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 3.4234045174600103e-06, + "loss": 0.6729, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 3.409377347487272e-06, + "loss": 0.6822, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 3.395376870981634e-06, + "loss": 0.7339, + "step": 23520 + }, + { + "epoch": 1.67, + "learning_rate": 3.3814031052525175e-06, + "loss": 0.7011, + "step": 23530 + }, + { + "epoch": 1.67, + "learning_rate": 3.367456067576327e-06, + "loss": 0.7216, + "step": 23540 + }, + { + "epoch": 1.67, + "learning_rate": 3.353535775196423e-06, + "loss": 0.7194, + "step": 23550 + }, + { + "epoch": 1.67, + "learning_rate": 3.339642245323102e-06, + "loss": 0.7163, + "step": 23560 + }, + { + "epoch": 1.67, + "learning_rate": 3.325775495133546e-06, + "loss": 0.736, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 3.31193554177186e-06, + "loss": 0.7071, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 3.298122402349002e-06, + "loss": 0.6889, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 3.2843360939427943e-06, + "loss": 0.6933, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 3.270576633597866e-06, + "loss": 0.699, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 3.2568440383256598e-06, + "loss": 0.6603, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 3.243138325104411e-06, + "loss": 0.6971, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 3.230826181364585e-06, + "loss": 0.7269, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 3.217171590696141e-06, + "loss": 0.7277, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 3.2035439311275006e-06, + "loss": 0.6943, + "step": 23660 + }, + { + "epoch": 1.68, + "learning_rate": 3.1899432195071575e-06, + "loss": 0.7204, + "step": 23670 + }, + { + "epoch": 1.68, + "learning_rate": 3.176369472650292e-06, + "loss": 0.7241, + "step": 23680 + }, + { + "epoch": 1.68, + "learning_rate": 3.1628227073387474e-06, + "loss": 0.6712, + "step": 23690 + }, + { + "epoch": 1.68, + "learning_rate": 3.1493029403209973e-06, + "loss": 0.6877, + "step": 23700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1358101883121547e-06, + "loss": 0.6953, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 3.122344467993907e-06, + "loss": 0.6918, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 3.1089057960145498e-06, + "loss": 0.6866, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 3.0954941889889096e-06, + "loss": 0.6975, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 3.082109663498378e-06, + "loss": 0.7213, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 3.068752236090841e-06, + "loss": 0.7225, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 3.055421923280702e-06, + "loss": 0.7064, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 3.0421187415488246e-06, + "loss": 0.696, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 3.028842707342541e-06, + "loss": 0.7251, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 3.0155938370756116e-06, + "loss": 0.7075, + "step": 23800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0023721471282214e-06, + "loss": 0.7181, + "step": 23810 + }, + { + "epoch": 1.69, + "learning_rate": 2.9891776538469362e-06, + "loss": 0.6982, + "step": 23820 + }, + { + "epoch": 1.69, + "learning_rate": 2.9760103735447186e-06, + "loss": 0.6984, + "step": 23830 + }, + { + "epoch": 1.69, + "learning_rate": 2.962870322500866e-06, + "loss": 0.7363, + "step": 23840 + }, + { + "epoch": 1.69, + "learning_rate": 2.9497575169610243e-06, + "loss": 0.6743, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.9366719731371563e-06, + "loss": 0.7141, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.9236137072075067e-06, + "loss": 0.7228, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.910582735316597e-06, + "loss": 0.7028, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.8975790735752186e-06, + "loss": 0.7098, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.8846027380603908e-06, + "loss": 0.6907, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.8716537448153364e-06, + "loss": 0.7226, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.8587321098494963e-06, + "loss": 0.7298, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.8458378491384606e-06, + "loss": 0.7172, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.832970978624e-06, + "loss": 0.7065, + "step": 23940 + }, + { + "epoch": 1.7, + "learning_rate": 2.8201315142140055e-06, + "loss": 0.6787, + "step": 23950 + }, + { + "epoch": 1.7, + "learning_rate": 2.8073194717824935e-06, + "loss": 0.6846, + "step": 23960 + }, + { + "epoch": 1.7, + "learning_rate": 2.794534867169568e-06, + "loss": 0.7014, + "step": 23970 + }, + { + "epoch": 1.7, + "learning_rate": 2.7817777161814245e-06, + "loss": 0.721, + "step": 23980 + }, + { + "epoch": 1.7, + "learning_rate": 2.769048034590299e-06, + "loss": 0.7459, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.7563458381344858e-06, + "loss": 0.6801, + "step": 24000 + }, + { + "epoch": 1.7, + "learning_rate": 2.7436711425182726e-06, + "loss": 0.7142, + "step": 24010 + }, + { + "epoch": 1.7, + "learning_rate": 2.7310239634119744e-06, + "loss": 0.6988, + "step": 24020 + }, + { + "epoch": 1.7, + "learning_rate": 2.718404316451864e-06, + "loss": 0.7297, + "step": 24030 + }, + { + "epoch": 1.7, + "learning_rate": 2.7058122172401916e-06, + "loss": 0.672, + "step": 24040 + }, + { + "epoch": 1.7, + "learning_rate": 2.693247681345132e-06, + "loss": 0.7278, + "step": 24050 + }, + { + "epoch": 1.7, + "learning_rate": 2.680710724300803e-06, + "loss": 0.7055, + "step": 24060 + }, + { + "epoch": 1.7, + "learning_rate": 2.6682013616072005e-06, + "loss": 0.7266, + "step": 24070 + }, + { + "epoch": 1.7, + "learning_rate": 2.655719608730231e-06, + "loss": 0.7121, + "step": 24080 + }, + { + "epoch": 1.71, + "learning_rate": 2.6432654811016395e-06, + "loss": 0.6943, + "step": 24090 + }, + { + "epoch": 1.71, + "learning_rate": 2.630838994119042e-06, + "loss": 0.709, + "step": 24100 + }, + { + "epoch": 1.71, + "learning_rate": 2.618440163145855e-06, + "loss": 0.7115, + "step": 24110 + }, + { + "epoch": 1.71, + "learning_rate": 2.60606900351133e-06, + "loss": 0.7056, + "step": 24120 + }, + { + "epoch": 1.71, + "learning_rate": 2.5937255305104825e-06, + "loss": 0.7071, + "step": 24130 + }, + { + "epoch": 1.71, + "learning_rate": 2.581409759404113e-06, + "loss": 0.6953, + "step": 24140 + }, + { + "epoch": 1.71, + "learning_rate": 2.5691217054187726e-06, + "loss": 0.7178, + "step": 24150 + }, + { + "epoch": 1.71, + "learning_rate": 2.556861383746731e-06, + "loss": 0.7034, + "step": 24160 + }, + { + "epoch": 1.71, + "learning_rate": 2.54462880954599e-06, + "loss": 0.7028, + "step": 24170 + }, + { + "epoch": 1.71, + "learning_rate": 2.532423997940231e-06, + "loss": 0.6898, + "step": 24180 + }, + { + "epoch": 1.71, + "learning_rate": 2.5202469640188187e-06, + "loss": 0.7106, + "step": 24190 + }, + { + "epoch": 1.71, + "learning_rate": 2.508097722836769e-06, + "loss": 0.7149, + "step": 24200 + }, + { + "epoch": 1.71, + "learning_rate": 2.4959762894147503e-06, + "loss": 0.7059, + "step": 24210 + }, + { + "epoch": 1.71, + "learning_rate": 2.48388267873903e-06, + "loss": 0.6858, + "step": 24220 + }, + { + "epoch": 1.72, + "learning_rate": 2.4718169057614953e-06, + "loss": 0.6949, + "step": 24230 + }, + { + "epoch": 1.72, + "learning_rate": 2.4597789853996022e-06, + "loss": 0.7374, + "step": 24240 + }, + { + "epoch": 1.72, + "learning_rate": 2.4477689325363875e-06, + "loss": 0.6925, + "step": 24250 + }, + { + "epoch": 1.72, + "learning_rate": 2.4357867620204174e-06, + "loss": 0.7254, + "step": 24260 + }, + { + "epoch": 1.72, + "learning_rate": 2.4238324886657976e-06, + "loss": 0.7108, + "step": 24270 + }, + { + "epoch": 1.72, + "learning_rate": 2.411906127252134e-06, + "loss": 0.7205, + "step": 24280 + }, + { + "epoch": 1.72, + "learning_rate": 2.4000076925245364e-06, + "loss": 0.7125, + "step": 24290 + }, + { + "epoch": 1.72, + "learning_rate": 2.388137199193571e-06, + "loss": 0.7166, + "step": 24300 + }, + { + "epoch": 1.72, + "learning_rate": 2.3762946619352773e-06, + "loss": 0.7536, + "step": 24310 + }, + { + "epoch": 1.72, + "learning_rate": 2.364480095391114e-06, + "loss": 0.6741, + "step": 24320 + }, + { + "epoch": 1.72, + "learning_rate": 2.3526935141679695e-06, + "loss": 0.7399, + "step": 24330 + }, + { + "epoch": 1.72, + "learning_rate": 2.3409349328381264e-06, + "loss": 0.6885, + "step": 24340 + }, + { + "epoch": 1.72, + "learning_rate": 2.329204365939261e-06, + "loss": 0.6654, + "step": 24350 + }, + { + "epoch": 1.72, + "learning_rate": 2.3175018279743943e-06, + "loss": 0.7019, + "step": 24360 + }, + { + "epoch": 1.72, + "learning_rate": 2.305827333411914e-06, + "loss": 0.6802, + "step": 24370 + }, + { + "epoch": 1.73, + "learning_rate": 2.2941808966855217e-06, + "loss": 0.6845, + "step": 24380 + }, + { + "epoch": 1.73, + "learning_rate": 2.2825625321942433e-06, + "loss": 0.7087, + "step": 24390 + }, + { + "epoch": 1.73, + "learning_rate": 2.270972254302381e-06, + "loss": 0.6803, + "step": 24400 + }, + { + "epoch": 1.73, + "learning_rate": 2.259410077339527e-06, + "loss": 0.7212, + "step": 24410 + }, + { + "epoch": 1.73, + "learning_rate": 2.2478760156005278e-06, + "loss": 0.7025, + "step": 24420 + }, + { + "epoch": 1.73, + "learning_rate": 2.2363700833454627e-06, + "loss": 0.6774, + "step": 24430 + }, + { + "epoch": 1.73, + "learning_rate": 2.2248922947996464e-06, + "loss": 0.7134, + "step": 24440 + }, + { + "epoch": 1.73, + "learning_rate": 2.2134426641535756e-06, + "loss": 0.71, + "step": 24450 + }, + { + "epoch": 1.73, + "learning_rate": 2.202021205562965e-06, + "loss": 0.7253, + "step": 24460 + }, + { + "epoch": 1.73, + "learning_rate": 2.19062793314867e-06, + "loss": 0.7008, + "step": 24470 + }, + { + "epoch": 1.73, + "learning_rate": 2.179262860996717e-06, + "loss": 0.7299, + "step": 24480 + }, + { + "epoch": 1.73, + "learning_rate": 2.1679260031582575e-06, + "loss": 0.7259, + "step": 24490 + }, + { + "epoch": 1.73, + "learning_rate": 2.1566173736495663e-06, + "loss": 0.6953, + "step": 24500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1453369864520125e-06, + "loss": 0.7024, + "step": 24510 + }, + { + "epoch": 1.74, + "learning_rate": 2.134084855512056e-06, + "loss": 0.7059, + "step": 24520 + }, + { + "epoch": 1.74, + "learning_rate": 2.1228609947412074e-06, + "loss": 0.6975, + "step": 24530 + }, + { + "epoch": 1.74, + "learning_rate": 2.111665418016051e-06, + "loss": 0.7137, + "step": 24540 + }, + { + "epoch": 1.74, + "learning_rate": 2.1004981391781697e-06, + "loss": 0.7219, + "step": 24550 + }, + { + "epoch": 1.74, + "learning_rate": 2.0893591720341888e-06, + "loss": 0.7267, + "step": 24560 + }, + { + "epoch": 1.74, + "learning_rate": 2.0782485303557144e-06, + "loss": 0.7114, + "step": 24570 + }, + { + "epoch": 1.74, + "learning_rate": 2.0671662278793387e-06, + "loss": 0.7075, + "step": 24580 + }, + { + "epoch": 1.74, + "learning_rate": 2.0561122783066128e-06, + "loss": 0.6794, + "step": 24590 + }, + { + "epoch": 1.74, + "learning_rate": 2.0450866953040436e-06, + "loss": 0.7195, + "step": 24600 + }, + { + "epoch": 1.74, + "learning_rate": 2.034089492503052e-06, + "loss": 0.7097, + "step": 24610 + }, + { + "epoch": 1.74, + "learning_rate": 2.0231206834999793e-06, + "loss": 0.7114, + "step": 24620 + }, + { + "epoch": 1.74, + "learning_rate": 2.0121802818560716e-06, + "loss": 0.7295, + "step": 24630 + }, + { + "epoch": 1.74, + "learning_rate": 2.0012683010974366e-06, + "loss": 0.7161, + "step": 24640 + }, + { + "epoch": 1.74, + "learning_rate": 1.9903847547150495e-06, + "loss": 0.6945, + "step": 24650 + }, + { + "epoch": 1.75, + "learning_rate": 1.979529656164736e-06, + "loss": 0.7025, + "step": 24660 + }, + { + "epoch": 1.75, + "learning_rate": 1.9687030188671517e-06, + "loss": 0.7047, + "step": 24670 + }, + { + "epoch": 1.75, + "learning_rate": 1.957904856207754e-06, + "loss": 0.6837, + "step": 24680 + }, + { + "epoch": 1.75, + "learning_rate": 1.9471351815367995e-06, + "loss": 0.7309, + "step": 24690 + }, + { + "epoch": 1.75, + "learning_rate": 1.9363940081693287e-06, + "loss": 0.7112, + "step": 24700 + }, + { + "epoch": 1.75, + "learning_rate": 1.92568134938515e-06, + "loss": 0.698, + "step": 24710 + }, + { + "epoch": 1.75, + "learning_rate": 1.914997218428796e-06, + "loss": 0.7365, + "step": 24720 + }, + { + "epoch": 1.75, + "learning_rate": 1.9043416285095568e-06, + "loss": 0.6988, + "step": 24730 + }, + { + "epoch": 1.75, + "learning_rate": 1.8937145928014073e-06, + "loss": 0.7022, + "step": 24740 + }, + { + "epoch": 1.75, + "learning_rate": 1.883116124443049e-06, + "loss": 0.7207, + "step": 24750 + }, + { + "epoch": 1.75, + "learning_rate": 1.8725462365378366e-06, + "loss": 0.7202, + "step": 24760 + }, + { + "epoch": 1.75, + "learning_rate": 1.862004942153811e-06, + "loss": 0.7263, + "step": 24770 + }, + { + "epoch": 1.75, + "learning_rate": 1.85149225432365e-06, + "loss": 0.7123, + "step": 24780 + }, + { + "epoch": 1.75, + "learning_rate": 1.8410081860446682e-06, + "loss": 0.7025, + "step": 24790 + }, + { + "epoch": 1.76, + "learning_rate": 1.8305527502787912e-06, + "loss": 0.7225, + "step": 24800 + }, + { + "epoch": 1.76, + "learning_rate": 1.8201259599525567e-06, + "loss": 0.6956, + "step": 24810 + }, + { + "epoch": 1.76, + "learning_rate": 1.8097278279570696e-06, + "loss": 0.716, + "step": 24820 + }, + { + "epoch": 1.76, + "learning_rate": 1.7993583671480213e-06, + "loss": 0.6895, + "step": 24830 + }, + { + "epoch": 1.76, + "learning_rate": 1.7890175903456403e-06, + "loss": 0.7089, + "step": 24840 + }, + { + "epoch": 1.76, + "learning_rate": 1.778705510334705e-06, + "loss": 0.7165, + "step": 24850 + }, + { + "epoch": 1.76, + "learning_rate": 1.7684221398645007e-06, + "loss": 0.6951, + "step": 24860 + }, + { + "epoch": 1.76, + "learning_rate": 1.7581674916488267e-06, + "loss": 0.6925, + "step": 24870 + }, + { + "epoch": 1.76, + "learning_rate": 1.7479415783659774e-06, + "loss": 0.724, + "step": 24880 + }, + { + "epoch": 1.76, + "learning_rate": 1.7377444126587094e-06, + "loss": 0.6585, + "step": 24890 + }, + { + "epoch": 1.76, + "learning_rate": 1.7275760071342379e-06, + "loss": 0.7301, + "step": 24900 + }, + { + "epoch": 1.76, + "learning_rate": 1.7174363743642257e-06, + "loss": 0.6777, + "step": 24910 + }, + { + "epoch": 1.76, + "learning_rate": 1.7073255268847676e-06, + "loss": 0.6788, + "step": 24920 + }, + { + "epoch": 1.76, + "learning_rate": 1.6972434771963586e-06, + "loss": 0.7013, + "step": 24930 + }, + { + "epoch": 1.77, + "learning_rate": 1.6871902377638915e-06, + "loss": 0.6847, + "step": 24940 + }, + { + "epoch": 1.77, + "learning_rate": 1.6771658210166436e-06, + "loss": 0.7232, + "step": 24950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6671702393482646e-06, + "loss": 0.7024, + "step": 24960 + }, + { + "epoch": 1.77, + "learning_rate": 1.657203505116739e-06, + "loss": 0.674, + "step": 24970 + }, + { + "epoch": 1.77, + "learning_rate": 1.647265630644393e-06, + "loss": 0.7264, + "step": 24980 + }, + { + "epoch": 1.77, + "learning_rate": 1.6373566282178704e-06, + "loss": 0.703, + "step": 24990 + }, + { + "epoch": 1.77, + "learning_rate": 1.6274765100881272e-06, + "loss": 0.7121, + "step": 25000 + }, + { + "epoch": 1.77, + "learning_rate": 1.6176252884703924e-06, + "loss": 0.7126, + "step": 25010 + }, + { + "epoch": 1.77, + "learning_rate": 1.6078029755441926e-06, + "loss": 0.7127, + "step": 25020 + }, + { + "epoch": 1.77, + "learning_rate": 1.5980095834532832e-06, + "loss": 0.7084, + "step": 25030 + }, + { + "epoch": 1.77, + "learning_rate": 1.5882451243056928e-06, + "loss": 0.6787, + "step": 25040 + }, + { + "epoch": 1.77, + "learning_rate": 1.578509610173659e-06, + "loss": 0.7314, + "step": 25050 + }, + { + "epoch": 1.77, + "learning_rate": 1.5688030530936425e-06, + "loss": 0.7144, + "step": 25060 + }, + { + "epoch": 1.77, + "learning_rate": 1.559125465066294e-06, + "loss": 0.7208, + "step": 25070 + }, + { + "epoch": 1.78, + "learning_rate": 1.5494768580564618e-06, + "loss": 0.7474, + "step": 25080 + }, + { + "epoch": 1.78, + "learning_rate": 1.5398572439931591e-06, + "loss": 0.7082, + "step": 25090 + }, + { + "epoch": 1.78, + "learning_rate": 1.5302666347695477e-06, + "loss": 0.7083, + "step": 25100 + }, + { + "epoch": 1.78, + "learning_rate": 1.5207050422429286e-06, + "loss": 0.7155, + "step": 25110 + }, + { + "epoch": 1.78, + "learning_rate": 1.5111724782347375e-06, + "loss": 0.6739, + "step": 25120 + }, + { + "epoch": 1.78, + "learning_rate": 1.501668954530519e-06, + "loss": 0.7285, + "step": 25130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4921944828799105e-06, + "loss": 0.6637, + "step": 25140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4827490749966227e-06, + "loss": 0.6976, + "step": 25150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4733327425584504e-06, + "loss": 0.7068, + "step": 25160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4639454972072337e-06, + "loss": 0.6992, + "step": 25170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4545873505488471e-06, + "loss": 0.694, + "step": 25180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4452583141531888e-06, + "loss": 0.7144, + "step": 25190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4359583995541741e-06, + "loss": 0.7099, + "step": 25200 + }, + { + "epoch": 1.78, + "learning_rate": 1.426687618249714e-06, + "loss": 0.6898, + "step": 25210 + }, + { + "epoch": 1.79, + "learning_rate": 1.4174459817016927e-06, + "loss": 0.6764, + "step": 25220 + }, + { + "epoch": 1.79, + "learning_rate": 1.4082335013359593e-06, + "loss": 0.6904, + "step": 25230 + }, + { + "epoch": 1.79, + "learning_rate": 1.3990501885423251e-06, + "loss": 0.7081, + "step": 25240 + }, + { + "epoch": 1.79, + "learning_rate": 1.3898960546745443e-06, + "loss": 0.6962, + "step": 25250 + }, + { + "epoch": 1.79, + "learning_rate": 1.3807711110502802e-06, + "loss": 0.7231, + "step": 25260 + }, + { + "epoch": 1.79, + "learning_rate": 1.3716753689511142e-06, + "loss": 0.7064, + "step": 25270 + }, + { + "epoch": 1.79, + "learning_rate": 1.3626088396225284e-06, + "loss": 0.6942, + "step": 25280 + }, + { + "epoch": 1.79, + "learning_rate": 1.353571534273887e-06, + "loss": 0.7291, + "step": 25290 + }, + { + "epoch": 1.79, + "learning_rate": 1.3445634640784133e-06, + "loss": 0.6808, + "step": 25300 + }, + { + "epoch": 1.79, + "learning_rate": 1.3355846401732015e-06, + "loss": 0.6962, + "step": 25310 + }, + { + "epoch": 1.79, + "learning_rate": 1.3266350736591744e-06, + "loss": 0.736, + "step": 25320 + }, + { + "epoch": 1.79, + "learning_rate": 1.3177147756010893e-06, + "loss": 0.6821, + "step": 25330 + }, + { + "epoch": 1.79, + "learning_rate": 1.3088237570275165e-06, + "loss": 0.6935, + "step": 25340 + }, + { + "epoch": 1.79, + "learning_rate": 1.2999620289308263e-06, + "loss": 0.7366, + "step": 25350 + }, + { + "epoch": 1.8, + "learning_rate": 1.2911296022671716e-06, + "loss": 0.7145, + "step": 25360 + }, + { + "epoch": 1.8, + "learning_rate": 1.2823264879564838e-06, + "loss": 0.6949, + "step": 25370 + }, + { + "epoch": 1.8, + "learning_rate": 1.2735526968824575e-06, + "loss": 0.7115, + "step": 25380 + }, + { + "epoch": 1.8, + "learning_rate": 1.264808239892526e-06, + "loss": 0.7214, + "step": 25390 + }, + { + "epoch": 1.8, + "learning_rate": 1.2560931277978526e-06, + "loss": 0.7128, + "step": 25400 + }, + { + "epoch": 1.8, + "learning_rate": 1.2474073713733353e-06, + "loss": 0.7109, + "step": 25410 + }, + { + "epoch": 1.8, + "learning_rate": 1.238750981357567e-06, + "loss": 0.7233, + "step": 25420 + }, + { + "epoch": 1.8, + "learning_rate": 1.2301239684528342e-06, + "loss": 0.7049, + "step": 25430 + }, + { + "epoch": 1.8, + "learning_rate": 1.2215263433250995e-06, + "loss": 0.7242, + "step": 25440 + }, + { + "epoch": 1.8, + "learning_rate": 1.2129581166040043e-06, + "loss": 0.6809, + "step": 25450 + }, + { + "epoch": 1.8, + "learning_rate": 1.2044192988828362e-06, + "loss": 0.7136, + "step": 25460 + }, + { + "epoch": 1.8, + "learning_rate": 1.1959099007185226e-06, + "loss": 0.6874, + "step": 25470 + }, + { + "epoch": 1.8, + "learning_rate": 1.1874299326316147e-06, + "loss": 0.7122, + "step": 25480 + }, + { + "epoch": 1.8, + "learning_rate": 1.1789794051062815e-06, + "loss": 0.7186, + "step": 25490 + }, + { + "epoch": 1.8, + "learning_rate": 1.1705583285903043e-06, + "loss": 0.7135, + "step": 25500 + }, + { + "epoch": 1.81, + "learning_rate": 1.162166713495033e-06, + "loss": 0.6536, + "step": 25510 + }, + { + "epoch": 1.81, + "learning_rate": 1.1538045701954047e-06, + "loss": 0.6984, + "step": 25520 + }, + { + "epoch": 1.81, + "learning_rate": 1.1454719090299131e-06, + "loss": 0.7201, + "step": 25530 + }, + { + "epoch": 1.81, + "learning_rate": 1.1371687403006147e-06, + "loss": 0.6937, + "step": 25540 + }, + { + "epoch": 1.81, + "learning_rate": 1.1288950742730898e-06, + "loss": 0.7263, + "step": 25550 + }, + { + "epoch": 1.81, + "learning_rate": 1.1206509211764415e-06, + "loss": 0.6799, + "step": 25560 + }, + { + "epoch": 1.81, + "learning_rate": 1.1124362912032949e-06, + "loss": 0.6887, + "step": 25570 + }, + { + "epoch": 1.81, + "learning_rate": 1.10425119450977e-06, + "loss": 0.7054, + "step": 25580 + }, + { + "epoch": 1.81, + "learning_rate": 1.0960956412154777e-06, + "loss": 0.7283, + "step": 25590 + }, + { + "epoch": 1.81, + "learning_rate": 1.087969641403494e-06, + "loss": 0.7374, + "step": 25600 + }, + { + "epoch": 1.81, + "learning_rate": 1.0798732051203603e-06, + "loss": 0.7013, + "step": 25610 + }, + { + "epoch": 1.81, + "learning_rate": 1.0718063423760722e-06, + "loss": 0.6999, + "step": 25620 + }, + { + "epoch": 1.81, + "learning_rate": 1.0637690631440572e-06, + "loss": 0.7123, + "step": 25630 + }, + { + "epoch": 1.81, + "learning_rate": 1.0557613773611697e-06, + "loss": 0.6978, + "step": 25640 + }, + { + "epoch": 1.82, + "learning_rate": 1.0477832949276706e-06, + "loss": 0.7393, + "step": 25650 + }, + { + "epoch": 1.82, + "learning_rate": 1.0398348257072282e-06, + "loss": 0.727, + "step": 25660 + }, + { + "epoch": 1.82, + "learning_rate": 1.0319159795268984e-06, + "loss": 0.7193, + "step": 25670 + }, + { + "epoch": 1.82, + "learning_rate": 1.024026766177108e-06, + "loss": 0.7097, + "step": 25680 + }, + { + "epoch": 1.82, + "learning_rate": 1.0161671954116464e-06, + "loss": 0.7109, + "step": 25690 + }, + { + "epoch": 1.82, + "learning_rate": 1.0083372769476629e-06, + "loss": 0.7086, + "step": 25700 + }, + { + "epoch": 1.82, + "learning_rate": 1.0005370204656418e-06, + "loss": 0.7081, + "step": 25710 + }, + { + "epoch": 1.82, + "learning_rate": 9.927664356093908e-07, + "loss": 0.6914, + "step": 25720 + }, + { + "epoch": 1.82, + "learning_rate": 9.850255319860362e-07, + "loss": 0.6929, + "step": 25730 + }, + { + "epoch": 1.82, + "learning_rate": 9.773143191660116e-07, + "loss": 0.7269, + "step": 25740 + }, + { + "epoch": 1.82, + "learning_rate": 9.696328066830378e-07, + "loss": 0.7243, + "step": 25750 + }, + { + "epoch": 1.82, + "learning_rate": 9.61981004034121e-07, + "loss": 0.718, + "step": 25760 + }, + { + "epoch": 1.82, + "learning_rate": 9.54358920679524e-07, + "loss": 0.7295, + "step": 25770 + }, + { + "epoch": 1.82, + "learning_rate": 9.46766566042781e-07, + "loss": 0.7101, + "step": 25780 + }, + { + "epoch": 1.83, + "learning_rate": 9.392039495106642e-07, + "loss": 0.7296, + "step": 25790 + }, + { + "epoch": 1.83, + "learning_rate": 9.31671080433183e-07, + "loss": 0.7022, + "step": 25800 + }, + { + "epoch": 1.83, + "learning_rate": 9.241679681235572e-07, + "loss": 0.7167, + "step": 25810 + }, + { + "epoch": 1.83, + "learning_rate": 9.166946218582301e-07, + "loss": 0.7109, + "step": 25820 + }, + { + "epoch": 1.83, + "learning_rate": 9.092510508768387e-07, + "loss": 0.7036, + "step": 25830 + }, + { + "epoch": 1.83, + "learning_rate": 9.018372643822132e-07, + "loss": 0.7064, + "step": 25840 + }, + { + "epoch": 1.83, + "learning_rate": 8.944532715403408e-07, + "loss": 0.7124, + "step": 25850 + }, + { + "epoch": 1.83, + "learning_rate": 8.87099081480397e-07, + "loss": 0.7441, + "step": 25860 + }, + { + "epoch": 1.83, + "learning_rate": 8.797747032947001e-07, + "loss": 0.7099, + "step": 25870 + }, + { + "epoch": 1.83, + "learning_rate": 8.724801460387094e-07, + "loss": 0.7087, + "step": 25880 + }, + { + "epoch": 1.83, + "learning_rate": 8.652154187310218e-07, + "loss": 0.7032, + "step": 25890 + }, + { + "epoch": 1.83, + "learning_rate": 8.579805303533417e-07, + "loss": 0.7031, + "step": 25900 + }, + { + "epoch": 1.83, + "learning_rate": 8.507754898504943e-07, + "loss": 0.6833, + "step": 25910 + }, + { + "epoch": 1.83, + "learning_rate": 8.436003061304043e-07, + "loss": 0.7052, + "step": 25920 + }, + { + "epoch": 1.84, + "learning_rate": 8.364549880640671e-07, + "loss": 0.7, + "step": 25930 + }, + { + "epoch": 1.84, + "learning_rate": 8.293395444855662e-07, + "loss": 0.7127, + "step": 25940 + }, + { + "epoch": 1.84, + "learning_rate": 8.222539841920507e-07, + "loss": 0.709, + "step": 25950 + }, + { + "epoch": 1.84, + "learning_rate": 8.151983159437215e-07, + "loss": 0.6866, + "step": 25960 + }, + { + "epoch": 1.84, + "learning_rate": 8.081725484638176e-07, + "loss": 0.7142, + "step": 25970 + }, + { + "epoch": 1.84, + "learning_rate": 8.011766904386154e-07, + "loss": 0.7077, + "step": 25980 + }, + { + "epoch": 1.84, + "learning_rate": 7.942107505174102e-07, + "loss": 0.7226, + "step": 25990 + }, + { + "epoch": 1.84, + "learning_rate": 7.872747373125156e-07, + "loss": 0.7148, + "step": 26000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 6.329063795842875e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-26000/training_args.bin b/checkpoint-26000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-26000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-27000/README.md b/checkpoint-27000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-27000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-27000/adapter_config.json b/checkpoint-27000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-27000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-27000/adapter_model.bin b/checkpoint-27000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..a9bf5dbe588ffc94a5e9c225a293be69f8d75329 --- /dev/null +++ b/checkpoint-27000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:26c33913ff15d47b0de2ee1caf1ecf8adbee35a6132deb3a5f29fb102b2b55ef +size 16821197 diff --git a/checkpoint-27000/finetuning_args.json b/checkpoint-27000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-27000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-27000/optimizer.pt b/checkpoint-27000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..167113d6631a4dbc6f34b564c128202cdc2ae33e --- /dev/null +++ b/checkpoint-27000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d6724873eb2a2c1bfa1834e7bc85257cd6aa3e81f220b8538c8c475362b37893 +size 33661637 diff --git a/checkpoint-27000/rng_state.pth b/checkpoint-27000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..3c6a228cb8bf163bb04b7928e3818915739d02ff --- /dev/null +++ b/checkpoint-27000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92cc8c3ce6699f8ff800c179ff147268a8166a0710c500aba60ed907ac201ef9 +size 18663 diff --git a/checkpoint-27000/scheduler.pt b/checkpoint-27000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..01908f170660d374c597afb8d45c8ac961c2b8f9 --- /dev/null +++ b/checkpoint-27000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aafa7e0079e30900aa50f774e69122213d422c983143aeeb1c0ed5648b197ca3 +size 627 diff --git a/checkpoint-27000/trainer_state.json b/checkpoint-27000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..472aa12a716424fa78e4243f6a1e9ad183f6ba69 --- /dev/null +++ b/checkpoint-27000/trainer_state.json @@ -0,0 +1,16216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9111323459149547, + "global_step": 27000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 4.143065996544804e-06, + "loss": 0.7359, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 4.1277527325498e-06, + "loss": 0.6894, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 4.112465273861799e-06, + "loss": 0.7237, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 4.097203639381364e-06, + "loss": 0.7028, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 4.081967847977164e-06, + "loss": 0.7038, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 4.066757918485886e-06, + "loss": 0.711, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 4.0515738697122694e-06, + "loss": 0.717, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 4.036415720429027e-06, + "loss": 0.7134, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 4.021283489376876e-06, + "loss": 0.709, + "step": 23090 + }, + { + "epoch": 1.64, + "learning_rate": 4.006177195264488e-06, + "loss": 0.7266, + "step": 23100 + }, + { + "epoch": 1.64, + "learning_rate": 3.9910968567684506e-06, + "loss": 0.6872, + "step": 23110 + }, + { + "epoch": 1.64, + "learning_rate": 3.976042492533269e-06, + "loss": 0.7256, + "step": 23120 + }, + { + "epoch": 1.64, + "learning_rate": 3.961014121171342e-06, + "loss": 0.7437, + "step": 23130 + }, + { + "epoch": 1.64, + "learning_rate": 3.946011761262932e-06, + "loss": 0.7111, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.931035431356134e-06, + "loss": 0.697, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.916085149966856e-06, + "loss": 0.7258, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.901160935578815e-06, + "loss": 0.7029, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.8862628066435065e-06, + "loss": 0.686, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.8713907815801534e-06, + "loss": 0.6994, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.856544878775708e-06, + "loss": 0.7039, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.841725116584846e-06, + "loss": 0.7096, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.8269315133299126e-06, + "loss": 0.7029, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.8121640873009067e-06, + "loss": 0.7133, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.7974228567554617e-06, + "loss": 0.7054, + "step": 23240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7827078399188393e-06, + "loss": 0.7077, + "step": 23250 + }, + { + "epoch": 1.65, + "learning_rate": 3.7680190549838847e-06, + "loss": 0.6985, + "step": 23260 + }, + { + "epoch": 1.65, + "learning_rate": 3.753356520111004e-06, + "loss": 0.7222, + "step": 23270 + }, + { + "epoch": 1.65, + "learning_rate": 3.738720253428152e-06, + "loss": 0.7174, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.724110273030812e-06, + "loss": 0.6935, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.709526596981977e-06, + "loss": 0.7094, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.6949692433120907e-06, + "loss": 0.7029, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.6804382300190844e-06, + "loss": 0.7145, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.665933575068298e-06, + "loss": 0.7247, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.6514552963925004e-06, + "loss": 0.7393, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.637003411891854e-06, + "loss": 0.7352, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.622577939433866e-06, + "loss": 0.6873, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.6081788968534066e-06, + "loss": 0.7056, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.5938063019526653e-06, + "loss": 0.7287, + "step": 23380 + }, + { + "epoch": 1.66, + "learning_rate": 3.579460172501142e-06, + "loss": 0.717, + "step": 23390 + }, + { + "epoch": 1.66, + "learning_rate": 3.5651405262356024e-06, + "loss": 0.7258, + "step": 23400 + }, + { + "epoch": 1.66, + "learning_rate": 3.5508473808600674e-06, + "loss": 0.6985, + "step": 23410 + }, + { + "epoch": 1.66, + "learning_rate": 3.5365807540458097e-06, + "loss": 0.7059, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.522340663431314e-06, + "loss": 0.7047, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.5081271266222434e-06, + "loss": 0.7064, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.4939401611914337e-06, + "loss": 0.6804, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.479779784678877e-06, + "loss": 0.7099, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 3.465646014591703e-06, + "loss": 0.7182, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4515388684041193e-06, + "loss": 0.6964, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 3.437458363557433e-06, + "loss": 0.707, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 3.4234045174600103e-06, + "loss": 0.6729, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 3.409377347487272e-06, + "loss": 0.6822, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 3.395376870981634e-06, + "loss": 0.7339, + "step": 23520 + }, + { + "epoch": 1.67, + "learning_rate": 3.3814031052525175e-06, + "loss": 0.7011, + "step": 23530 + }, + { + "epoch": 1.67, + "learning_rate": 3.367456067576327e-06, + "loss": 0.7216, + "step": 23540 + }, + { + "epoch": 1.67, + "learning_rate": 3.353535775196423e-06, + "loss": 0.7194, + "step": 23550 + }, + { + "epoch": 1.67, + "learning_rate": 3.339642245323102e-06, + "loss": 0.7163, + "step": 23560 + }, + { + "epoch": 1.67, + "learning_rate": 3.325775495133546e-06, + "loss": 0.736, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 3.31193554177186e-06, + "loss": 0.7071, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 3.298122402349002e-06, + "loss": 0.6889, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 3.2843360939427943e-06, + "loss": 0.6933, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 3.270576633597866e-06, + "loss": 0.699, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 3.2568440383256598e-06, + "loss": 0.6603, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 3.243138325104411e-06, + "loss": 0.6971, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 3.230826181364585e-06, + "loss": 0.7269, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 3.217171590696141e-06, + "loss": 0.7277, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 3.2035439311275006e-06, + "loss": 0.6943, + "step": 23660 + }, + { + "epoch": 1.68, + "learning_rate": 3.1899432195071575e-06, + "loss": 0.7204, + "step": 23670 + }, + { + "epoch": 1.68, + "learning_rate": 3.176369472650292e-06, + "loss": 0.7241, + "step": 23680 + }, + { + "epoch": 1.68, + "learning_rate": 3.1628227073387474e-06, + "loss": 0.6712, + "step": 23690 + }, + { + "epoch": 1.68, + "learning_rate": 3.1493029403209973e-06, + "loss": 0.6877, + "step": 23700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1358101883121547e-06, + "loss": 0.6953, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 3.122344467993907e-06, + "loss": 0.6918, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 3.1089057960145498e-06, + "loss": 0.6866, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 3.0954941889889096e-06, + "loss": 0.6975, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 3.082109663498378e-06, + "loss": 0.7213, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 3.068752236090841e-06, + "loss": 0.7225, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 3.055421923280702e-06, + "loss": 0.7064, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 3.0421187415488246e-06, + "loss": 0.696, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 3.028842707342541e-06, + "loss": 0.7251, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 3.0155938370756116e-06, + "loss": 0.7075, + "step": 23800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0023721471282214e-06, + "loss": 0.7181, + "step": 23810 + }, + { + "epoch": 1.69, + "learning_rate": 2.9891776538469362e-06, + "loss": 0.6982, + "step": 23820 + }, + { + "epoch": 1.69, + "learning_rate": 2.9760103735447186e-06, + "loss": 0.6984, + "step": 23830 + }, + { + "epoch": 1.69, + "learning_rate": 2.962870322500866e-06, + "loss": 0.7363, + "step": 23840 + }, + { + "epoch": 1.69, + "learning_rate": 2.9497575169610243e-06, + "loss": 0.6743, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.9366719731371563e-06, + "loss": 0.7141, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.9236137072075067e-06, + "loss": 0.7228, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.910582735316597e-06, + "loss": 0.7028, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.8975790735752186e-06, + "loss": 0.7098, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.8846027380603908e-06, + "loss": 0.6907, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.8716537448153364e-06, + "loss": 0.7226, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.8587321098494963e-06, + "loss": 0.7298, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.8458378491384606e-06, + "loss": 0.7172, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.832970978624e-06, + "loss": 0.7065, + "step": 23940 + }, + { + "epoch": 1.7, + "learning_rate": 2.8201315142140055e-06, + "loss": 0.6787, + "step": 23950 + }, + { + "epoch": 1.7, + "learning_rate": 2.8073194717824935e-06, + "loss": 0.6846, + "step": 23960 + }, + { + "epoch": 1.7, + "learning_rate": 2.794534867169568e-06, + "loss": 0.7014, + "step": 23970 + }, + { + "epoch": 1.7, + "learning_rate": 2.7817777161814245e-06, + "loss": 0.721, + "step": 23980 + }, + { + "epoch": 1.7, + "learning_rate": 2.769048034590299e-06, + "loss": 0.7459, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.7563458381344858e-06, + "loss": 0.6801, + "step": 24000 + }, + { + "epoch": 1.7, + "learning_rate": 2.7436711425182726e-06, + "loss": 0.7142, + "step": 24010 + }, + { + "epoch": 1.7, + "learning_rate": 2.7310239634119744e-06, + "loss": 0.6988, + "step": 24020 + }, + { + "epoch": 1.7, + "learning_rate": 2.718404316451864e-06, + "loss": 0.7297, + "step": 24030 + }, + { + "epoch": 1.7, + "learning_rate": 2.7058122172401916e-06, + "loss": 0.672, + "step": 24040 + }, + { + "epoch": 1.7, + "learning_rate": 2.693247681345132e-06, + "loss": 0.7278, + "step": 24050 + }, + { + "epoch": 1.7, + "learning_rate": 2.680710724300803e-06, + "loss": 0.7055, + "step": 24060 + }, + { + "epoch": 1.7, + "learning_rate": 2.6682013616072005e-06, + "loss": 0.7266, + "step": 24070 + }, + { + "epoch": 1.7, + "learning_rate": 2.655719608730231e-06, + "loss": 0.7121, + "step": 24080 + }, + { + "epoch": 1.71, + "learning_rate": 2.6432654811016395e-06, + "loss": 0.6943, + "step": 24090 + }, + { + "epoch": 1.71, + "learning_rate": 2.630838994119042e-06, + "loss": 0.709, + "step": 24100 + }, + { + "epoch": 1.71, + "learning_rate": 2.618440163145855e-06, + "loss": 0.7115, + "step": 24110 + }, + { + "epoch": 1.71, + "learning_rate": 2.60606900351133e-06, + "loss": 0.7056, + "step": 24120 + }, + { + "epoch": 1.71, + "learning_rate": 2.5937255305104825e-06, + "loss": 0.7071, + "step": 24130 + }, + { + "epoch": 1.71, + "learning_rate": 2.581409759404113e-06, + "loss": 0.6953, + "step": 24140 + }, + { + "epoch": 1.71, + "learning_rate": 2.5691217054187726e-06, + "loss": 0.7178, + "step": 24150 + }, + { + "epoch": 1.71, + "learning_rate": 2.556861383746731e-06, + "loss": 0.7034, + "step": 24160 + }, + { + "epoch": 1.71, + "learning_rate": 2.54462880954599e-06, + "loss": 0.7028, + "step": 24170 + }, + { + "epoch": 1.71, + "learning_rate": 2.532423997940231e-06, + "loss": 0.6898, + "step": 24180 + }, + { + "epoch": 1.71, + "learning_rate": 2.5202469640188187e-06, + "loss": 0.7106, + "step": 24190 + }, + { + "epoch": 1.71, + "learning_rate": 2.508097722836769e-06, + "loss": 0.7149, + "step": 24200 + }, + { + "epoch": 1.71, + "learning_rate": 2.4959762894147503e-06, + "loss": 0.7059, + "step": 24210 + }, + { + "epoch": 1.71, + "learning_rate": 2.48388267873903e-06, + "loss": 0.6858, + "step": 24220 + }, + { + "epoch": 1.72, + "learning_rate": 2.4718169057614953e-06, + "loss": 0.6949, + "step": 24230 + }, + { + "epoch": 1.72, + "learning_rate": 2.4597789853996022e-06, + "loss": 0.7374, + "step": 24240 + }, + { + "epoch": 1.72, + "learning_rate": 2.4477689325363875e-06, + "loss": 0.6925, + "step": 24250 + }, + { + "epoch": 1.72, + "learning_rate": 2.4357867620204174e-06, + "loss": 0.7254, + "step": 24260 + }, + { + "epoch": 1.72, + "learning_rate": 2.4238324886657976e-06, + "loss": 0.7108, + "step": 24270 + }, + { + "epoch": 1.72, + "learning_rate": 2.411906127252134e-06, + "loss": 0.7205, + "step": 24280 + }, + { + "epoch": 1.72, + "learning_rate": 2.4000076925245364e-06, + "loss": 0.7125, + "step": 24290 + }, + { + "epoch": 1.72, + "learning_rate": 2.388137199193571e-06, + "loss": 0.7166, + "step": 24300 + }, + { + "epoch": 1.72, + "learning_rate": 2.3762946619352773e-06, + "loss": 0.7536, + "step": 24310 + }, + { + "epoch": 1.72, + "learning_rate": 2.364480095391114e-06, + "loss": 0.6741, + "step": 24320 + }, + { + "epoch": 1.72, + "learning_rate": 2.3526935141679695e-06, + "loss": 0.7399, + "step": 24330 + }, + { + "epoch": 1.72, + "learning_rate": 2.3409349328381264e-06, + "loss": 0.6885, + "step": 24340 + }, + { + "epoch": 1.72, + "learning_rate": 2.329204365939261e-06, + "loss": 0.6654, + "step": 24350 + }, + { + "epoch": 1.72, + "learning_rate": 2.3175018279743943e-06, + "loss": 0.7019, + "step": 24360 + }, + { + "epoch": 1.72, + "learning_rate": 2.305827333411914e-06, + "loss": 0.6802, + "step": 24370 + }, + { + "epoch": 1.73, + "learning_rate": 2.2941808966855217e-06, + "loss": 0.6845, + "step": 24380 + }, + { + "epoch": 1.73, + "learning_rate": 2.2825625321942433e-06, + "loss": 0.7087, + "step": 24390 + }, + { + "epoch": 1.73, + "learning_rate": 2.270972254302381e-06, + "loss": 0.6803, + "step": 24400 + }, + { + "epoch": 1.73, + "learning_rate": 2.259410077339527e-06, + "loss": 0.7212, + "step": 24410 + }, + { + "epoch": 1.73, + "learning_rate": 2.2478760156005278e-06, + "loss": 0.7025, + "step": 24420 + }, + { + "epoch": 1.73, + "learning_rate": 2.2363700833454627e-06, + "loss": 0.6774, + "step": 24430 + }, + { + "epoch": 1.73, + "learning_rate": 2.2248922947996464e-06, + "loss": 0.7134, + "step": 24440 + }, + { + "epoch": 1.73, + "learning_rate": 2.2134426641535756e-06, + "loss": 0.71, + "step": 24450 + }, + { + "epoch": 1.73, + "learning_rate": 2.202021205562965e-06, + "loss": 0.7253, + "step": 24460 + }, + { + "epoch": 1.73, + "learning_rate": 2.19062793314867e-06, + "loss": 0.7008, + "step": 24470 + }, + { + "epoch": 1.73, + "learning_rate": 2.179262860996717e-06, + "loss": 0.7299, + "step": 24480 + }, + { + "epoch": 1.73, + "learning_rate": 2.1679260031582575e-06, + "loss": 0.7259, + "step": 24490 + }, + { + "epoch": 1.73, + "learning_rate": 2.1566173736495663e-06, + "loss": 0.6953, + "step": 24500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1453369864520125e-06, + "loss": 0.7024, + "step": 24510 + }, + { + "epoch": 1.74, + "learning_rate": 2.134084855512056e-06, + "loss": 0.7059, + "step": 24520 + }, + { + "epoch": 1.74, + "learning_rate": 2.1228609947412074e-06, + "loss": 0.6975, + "step": 24530 + }, + { + "epoch": 1.74, + "learning_rate": 2.111665418016051e-06, + "loss": 0.7137, + "step": 24540 + }, + { + "epoch": 1.74, + "learning_rate": 2.1004981391781697e-06, + "loss": 0.7219, + "step": 24550 + }, + { + "epoch": 1.74, + "learning_rate": 2.0893591720341888e-06, + "loss": 0.7267, + "step": 24560 + }, + { + "epoch": 1.74, + "learning_rate": 2.0782485303557144e-06, + "loss": 0.7114, + "step": 24570 + }, + { + "epoch": 1.74, + "learning_rate": 2.0671662278793387e-06, + "loss": 0.7075, + "step": 24580 + }, + { + "epoch": 1.74, + "learning_rate": 2.0561122783066128e-06, + "loss": 0.6794, + "step": 24590 + }, + { + "epoch": 1.74, + "learning_rate": 2.0450866953040436e-06, + "loss": 0.7195, + "step": 24600 + }, + { + "epoch": 1.74, + "learning_rate": 2.034089492503052e-06, + "loss": 0.7097, + "step": 24610 + }, + { + "epoch": 1.74, + "learning_rate": 2.0231206834999793e-06, + "loss": 0.7114, + "step": 24620 + }, + { + "epoch": 1.74, + "learning_rate": 2.0121802818560716e-06, + "loss": 0.7295, + "step": 24630 + }, + { + "epoch": 1.74, + "learning_rate": 2.0012683010974366e-06, + "loss": 0.7161, + "step": 24640 + }, + { + "epoch": 1.74, + "learning_rate": 1.9903847547150495e-06, + "loss": 0.6945, + "step": 24650 + }, + { + "epoch": 1.75, + "learning_rate": 1.979529656164736e-06, + "loss": 0.7025, + "step": 24660 + }, + { + "epoch": 1.75, + "learning_rate": 1.9687030188671517e-06, + "loss": 0.7047, + "step": 24670 + }, + { + "epoch": 1.75, + "learning_rate": 1.957904856207754e-06, + "loss": 0.6837, + "step": 24680 + }, + { + "epoch": 1.75, + "learning_rate": 1.9471351815367995e-06, + "loss": 0.7309, + "step": 24690 + }, + { + "epoch": 1.75, + "learning_rate": 1.9363940081693287e-06, + "loss": 0.7112, + "step": 24700 + }, + { + "epoch": 1.75, + "learning_rate": 1.92568134938515e-06, + "loss": 0.698, + "step": 24710 + }, + { + "epoch": 1.75, + "learning_rate": 1.914997218428796e-06, + "loss": 0.7365, + "step": 24720 + }, + { + "epoch": 1.75, + "learning_rate": 1.9043416285095568e-06, + "loss": 0.6988, + "step": 24730 + }, + { + "epoch": 1.75, + "learning_rate": 1.8937145928014073e-06, + "loss": 0.7022, + "step": 24740 + }, + { + "epoch": 1.75, + "learning_rate": 1.883116124443049e-06, + "loss": 0.7207, + "step": 24750 + }, + { + "epoch": 1.75, + "learning_rate": 1.8725462365378366e-06, + "loss": 0.7202, + "step": 24760 + }, + { + "epoch": 1.75, + "learning_rate": 1.862004942153811e-06, + "loss": 0.7263, + "step": 24770 + }, + { + "epoch": 1.75, + "learning_rate": 1.85149225432365e-06, + "loss": 0.7123, + "step": 24780 + }, + { + "epoch": 1.75, + "learning_rate": 1.8410081860446682e-06, + "loss": 0.7025, + "step": 24790 + }, + { + "epoch": 1.76, + "learning_rate": 1.8305527502787912e-06, + "loss": 0.7225, + "step": 24800 + }, + { + "epoch": 1.76, + "learning_rate": 1.8201259599525567e-06, + "loss": 0.6956, + "step": 24810 + }, + { + "epoch": 1.76, + "learning_rate": 1.8097278279570696e-06, + "loss": 0.716, + "step": 24820 + }, + { + "epoch": 1.76, + "learning_rate": 1.7993583671480213e-06, + "loss": 0.6895, + "step": 24830 + }, + { + "epoch": 1.76, + "learning_rate": 1.7890175903456403e-06, + "loss": 0.7089, + "step": 24840 + }, + { + "epoch": 1.76, + "learning_rate": 1.778705510334705e-06, + "loss": 0.7165, + "step": 24850 + }, + { + "epoch": 1.76, + "learning_rate": 1.7684221398645007e-06, + "loss": 0.6951, + "step": 24860 + }, + { + "epoch": 1.76, + "learning_rate": 1.7581674916488267e-06, + "loss": 0.6925, + "step": 24870 + }, + { + "epoch": 1.76, + "learning_rate": 1.7479415783659774e-06, + "loss": 0.724, + "step": 24880 + }, + { + "epoch": 1.76, + "learning_rate": 1.7377444126587094e-06, + "loss": 0.6585, + "step": 24890 + }, + { + "epoch": 1.76, + "learning_rate": 1.7275760071342379e-06, + "loss": 0.7301, + "step": 24900 + }, + { + "epoch": 1.76, + "learning_rate": 1.7174363743642257e-06, + "loss": 0.6777, + "step": 24910 + }, + { + "epoch": 1.76, + "learning_rate": 1.7073255268847676e-06, + "loss": 0.6788, + "step": 24920 + }, + { + "epoch": 1.76, + "learning_rate": 1.6972434771963586e-06, + "loss": 0.7013, + "step": 24930 + }, + { + "epoch": 1.77, + "learning_rate": 1.6871902377638915e-06, + "loss": 0.6847, + "step": 24940 + }, + { + "epoch": 1.77, + "learning_rate": 1.6771658210166436e-06, + "loss": 0.7232, + "step": 24950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6671702393482646e-06, + "loss": 0.7024, + "step": 24960 + }, + { + "epoch": 1.77, + "learning_rate": 1.657203505116739e-06, + "loss": 0.674, + "step": 24970 + }, + { + "epoch": 1.77, + "learning_rate": 1.647265630644393e-06, + "loss": 0.7264, + "step": 24980 + }, + { + "epoch": 1.77, + "learning_rate": 1.6373566282178704e-06, + "loss": 0.703, + "step": 24990 + }, + { + "epoch": 1.77, + "learning_rate": 1.6274765100881272e-06, + "loss": 0.7121, + "step": 25000 + }, + { + "epoch": 1.77, + "learning_rate": 1.6176252884703924e-06, + "loss": 0.7126, + "step": 25010 + }, + { + "epoch": 1.77, + "learning_rate": 1.6078029755441926e-06, + "loss": 0.7127, + "step": 25020 + }, + { + "epoch": 1.77, + "learning_rate": 1.5980095834532832e-06, + "loss": 0.7084, + "step": 25030 + }, + { + "epoch": 1.77, + "learning_rate": 1.5882451243056928e-06, + "loss": 0.6787, + "step": 25040 + }, + { + "epoch": 1.77, + "learning_rate": 1.578509610173659e-06, + "loss": 0.7314, + "step": 25050 + }, + { + "epoch": 1.77, + "learning_rate": 1.5688030530936425e-06, + "loss": 0.7144, + "step": 25060 + }, + { + "epoch": 1.77, + "learning_rate": 1.559125465066294e-06, + "loss": 0.7208, + "step": 25070 + }, + { + "epoch": 1.78, + "learning_rate": 1.5494768580564618e-06, + "loss": 0.7474, + "step": 25080 + }, + { + "epoch": 1.78, + "learning_rate": 1.5398572439931591e-06, + "loss": 0.7082, + "step": 25090 + }, + { + "epoch": 1.78, + "learning_rate": 1.5302666347695477e-06, + "loss": 0.7083, + "step": 25100 + }, + { + "epoch": 1.78, + "learning_rate": 1.5207050422429286e-06, + "loss": 0.7155, + "step": 25110 + }, + { + "epoch": 1.78, + "learning_rate": 1.5111724782347375e-06, + "loss": 0.6739, + "step": 25120 + }, + { + "epoch": 1.78, + "learning_rate": 1.501668954530519e-06, + "loss": 0.7285, + "step": 25130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4921944828799105e-06, + "loss": 0.6637, + "step": 25140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4827490749966227e-06, + "loss": 0.6976, + "step": 25150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4733327425584504e-06, + "loss": 0.7068, + "step": 25160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4639454972072337e-06, + "loss": 0.6992, + "step": 25170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4545873505488471e-06, + "loss": 0.694, + "step": 25180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4452583141531888e-06, + "loss": 0.7144, + "step": 25190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4359583995541741e-06, + "loss": 0.7099, + "step": 25200 + }, + { + "epoch": 1.78, + "learning_rate": 1.426687618249714e-06, + "loss": 0.6898, + "step": 25210 + }, + { + "epoch": 1.79, + "learning_rate": 1.4174459817016927e-06, + "loss": 0.6764, + "step": 25220 + }, + { + "epoch": 1.79, + "learning_rate": 1.4082335013359593e-06, + "loss": 0.6904, + "step": 25230 + }, + { + "epoch": 1.79, + "learning_rate": 1.3990501885423251e-06, + "loss": 0.7081, + "step": 25240 + }, + { + "epoch": 1.79, + "learning_rate": 1.3898960546745443e-06, + "loss": 0.6962, + "step": 25250 + }, + { + "epoch": 1.79, + "learning_rate": 1.3807711110502802e-06, + "loss": 0.7231, + "step": 25260 + }, + { + "epoch": 1.79, + "learning_rate": 1.3716753689511142e-06, + "loss": 0.7064, + "step": 25270 + }, + { + "epoch": 1.79, + "learning_rate": 1.3626088396225284e-06, + "loss": 0.6942, + "step": 25280 + }, + { + "epoch": 1.79, + "learning_rate": 1.353571534273887e-06, + "loss": 0.7291, + "step": 25290 + }, + { + "epoch": 1.79, + "learning_rate": 1.3445634640784133e-06, + "loss": 0.6808, + "step": 25300 + }, + { + "epoch": 1.79, + "learning_rate": 1.3355846401732015e-06, + "loss": 0.6962, + "step": 25310 + }, + { + "epoch": 1.79, + "learning_rate": 1.3266350736591744e-06, + "loss": 0.736, + "step": 25320 + }, + { + "epoch": 1.79, + "learning_rate": 1.3177147756010893e-06, + "loss": 0.6821, + "step": 25330 + }, + { + "epoch": 1.79, + "learning_rate": 1.3088237570275165e-06, + "loss": 0.6935, + "step": 25340 + }, + { + "epoch": 1.79, + "learning_rate": 1.2999620289308263e-06, + "loss": 0.7366, + "step": 25350 + }, + { + "epoch": 1.8, + "learning_rate": 1.2911296022671716e-06, + "loss": 0.7145, + "step": 25360 + }, + { + "epoch": 1.8, + "learning_rate": 1.2823264879564838e-06, + "loss": 0.6949, + "step": 25370 + }, + { + "epoch": 1.8, + "learning_rate": 1.2735526968824575e-06, + "loss": 0.7115, + "step": 25380 + }, + { + "epoch": 1.8, + "learning_rate": 1.264808239892526e-06, + "loss": 0.7214, + "step": 25390 + }, + { + "epoch": 1.8, + "learning_rate": 1.2560931277978526e-06, + "loss": 0.7128, + "step": 25400 + }, + { + "epoch": 1.8, + "learning_rate": 1.2474073713733353e-06, + "loss": 0.7109, + "step": 25410 + }, + { + "epoch": 1.8, + "learning_rate": 1.238750981357567e-06, + "loss": 0.7233, + "step": 25420 + }, + { + "epoch": 1.8, + "learning_rate": 1.2301239684528342e-06, + "loss": 0.7049, + "step": 25430 + }, + { + "epoch": 1.8, + "learning_rate": 1.2215263433250995e-06, + "loss": 0.7242, + "step": 25440 + }, + { + "epoch": 1.8, + "learning_rate": 1.2129581166040043e-06, + "loss": 0.6809, + "step": 25450 + }, + { + "epoch": 1.8, + "learning_rate": 1.2044192988828362e-06, + "loss": 0.7136, + "step": 25460 + }, + { + "epoch": 1.8, + "learning_rate": 1.1959099007185226e-06, + "loss": 0.6874, + "step": 25470 + }, + { + "epoch": 1.8, + "learning_rate": 1.1874299326316147e-06, + "loss": 0.7122, + "step": 25480 + }, + { + "epoch": 1.8, + "learning_rate": 1.1789794051062815e-06, + "loss": 0.7186, + "step": 25490 + }, + { + "epoch": 1.8, + "learning_rate": 1.1705583285903043e-06, + "loss": 0.7135, + "step": 25500 + }, + { + "epoch": 1.81, + "learning_rate": 1.162166713495033e-06, + "loss": 0.6536, + "step": 25510 + }, + { + "epoch": 1.81, + "learning_rate": 1.1538045701954047e-06, + "loss": 0.6984, + "step": 25520 + }, + { + "epoch": 1.81, + "learning_rate": 1.1454719090299131e-06, + "loss": 0.7201, + "step": 25530 + }, + { + "epoch": 1.81, + "learning_rate": 1.1371687403006147e-06, + "loss": 0.6937, + "step": 25540 + }, + { + "epoch": 1.81, + "learning_rate": 1.1288950742730898e-06, + "loss": 0.7263, + "step": 25550 + }, + { + "epoch": 1.81, + "learning_rate": 1.1206509211764415e-06, + "loss": 0.6799, + "step": 25560 + }, + { + "epoch": 1.81, + "learning_rate": 1.1124362912032949e-06, + "loss": 0.6887, + "step": 25570 + }, + { + "epoch": 1.81, + "learning_rate": 1.10425119450977e-06, + "loss": 0.7054, + "step": 25580 + }, + { + "epoch": 1.81, + "learning_rate": 1.0960956412154777e-06, + "loss": 0.7283, + "step": 25590 + }, + { + "epoch": 1.81, + "learning_rate": 1.087969641403494e-06, + "loss": 0.7374, + "step": 25600 + }, + { + "epoch": 1.81, + "learning_rate": 1.0798732051203603e-06, + "loss": 0.7013, + "step": 25610 + }, + { + "epoch": 1.81, + "learning_rate": 1.0718063423760722e-06, + "loss": 0.6999, + "step": 25620 + }, + { + "epoch": 1.81, + "learning_rate": 1.0637690631440572e-06, + "loss": 0.7123, + "step": 25630 + }, + { + "epoch": 1.81, + "learning_rate": 1.0557613773611697e-06, + "loss": 0.6978, + "step": 25640 + }, + { + "epoch": 1.82, + "learning_rate": 1.0477832949276706e-06, + "loss": 0.7393, + "step": 25650 + }, + { + "epoch": 1.82, + "learning_rate": 1.0398348257072282e-06, + "loss": 0.727, + "step": 25660 + }, + { + "epoch": 1.82, + "learning_rate": 1.0319159795268984e-06, + "loss": 0.7193, + "step": 25670 + }, + { + "epoch": 1.82, + "learning_rate": 1.024026766177108e-06, + "loss": 0.7097, + "step": 25680 + }, + { + "epoch": 1.82, + "learning_rate": 1.0161671954116464e-06, + "loss": 0.7109, + "step": 25690 + }, + { + "epoch": 1.82, + "learning_rate": 1.0083372769476629e-06, + "loss": 0.7086, + "step": 25700 + }, + { + "epoch": 1.82, + "learning_rate": 1.0005370204656418e-06, + "loss": 0.7081, + "step": 25710 + }, + { + "epoch": 1.82, + "learning_rate": 9.927664356093908e-07, + "loss": 0.6914, + "step": 25720 + }, + { + "epoch": 1.82, + "learning_rate": 9.850255319860362e-07, + "loss": 0.6929, + "step": 25730 + }, + { + "epoch": 1.82, + "learning_rate": 9.773143191660116e-07, + "loss": 0.7269, + "step": 25740 + }, + { + "epoch": 1.82, + "learning_rate": 9.696328066830378e-07, + "loss": 0.7243, + "step": 25750 + }, + { + "epoch": 1.82, + "learning_rate": 9.61981004034121e-07, + "loss": 0.718, + "step": 25760 + }, + { + "epoch": 1.82, + "learning_rate": 9.54358920679524e-07, + "loss": 0.7295, + "step": 25770 + }, + { + "epoch": 1.82, + "learning_rate": 9.46766566042781e-07, + "loss": 0.7101, + "step": 25780 + }, + { + "epoch": 1.83, + "learning_rate": 9.392039495106642e-07, + "loss": 0.7296, + "step": 25790 + }, + { + "epoch": 1.83, + "learning_rate": 9.31671080433183e-07, + "loss": 0.7022, + "step": 25800 + }, + { + "epoch": 1.83, + "learning_rate": 9.241679681235572e-07, + "loss": 0.7167, + "step": 25810 + }, + { + "epoch": 1.83, + "learning_rate": 9.166946218582301e-07, + "loss": 0.7109, + "step": 25820 + }, + { + "epoch": 1.83, + "learning_rate": 9.092510508768387e-07, + "loss": 0.7036, + "step": 25830 + }, + { + "epoch": 1.83, + "learning_rate": 9.018372643822132e-07, + "loss": 0.7064, + "step": 25840 + }, + { + "epoch": 1.83, + "learning_rate": 8.944532715403408e-07, + "loss": 0.7124, + "step": 25850 + }, + { + "epoch": 1.83, + "learning_rate": 8.87099081480397e-07, + "loss": 0.7441, + "step": 25860 + }, + { + "epoch": 1.83, + "learning_rate": 8.797747032947001e-07, + "loss": 0.7099, + "step": 25870 + }, + { + "epoch": 1.83, + "learning_rate": 8.724801460387094e-07, + "loss": 0.7087, + "step": 25880 + }, + { + "epoch": 1.83, + "learning_rate": 8.652154187310218e-07, + "loss": 0.7032, + "step": 25890 + }, + { + "epoch": 1.83, + "learning_rate": 8.579805303533417e-07, + "loss": 0.7031, + "step": 25900 + }, + { + "epoch": 1.83, + "learning_rate": 8.507754898504943e-07, + "loss": 0.6833, + "step": 25910 + }, + { + "epoch": 1.83, + "learning_rate": 8.436003061304043e-07, + "loss": 0.7052, + "step": 25920 + }, + { + "epoch": 1.84, + "learning_rate": 8.364549880640671e-07, + "loss": 0.7, + "step": 25930 + }, + { + "epoch": 1.84, + "learning_rate": 8.293395444855662e-07, + "loss": 0.7127, + "step": 25940 + }, + { + "epoch": 1.84, + "learning_rate": 8.222539841920507e-07, + "loss": 0.709, + "step": 25950 + }, + { + "epoch": 1.84, + "learning_rate": 8.151983159437215e-07, + "loss": 0.6866, + "step": 25960 + }, + { + "epoch": 1.84, + "learning_rate": 8.081725484638176e-07, + "loss": 0.7142, + "step": 25970 + }, + { + "epoch": 1.84, + "learning_rate": 8.011766904386154e-07, + "loss": 0.7077, + "step": 25980 + }, + { + "epoch": 1.84, + "learning_rate": 7.942107505174102e-07, + "loss": 0.7226, + "step": 25990 + }, + { + "epoch": 1.84, + "learning_rate": 7.872747373125156e-07, + "loss": 0.7148, + "step": 26000 + }, + { + "epoch": 1.84, + "learning_rate": 7.80368659399236e-07, + "loss": 0.7326, + "step": 26010 + }, + { + "epoch": 1.84, + "learning_rate": 7.734925253158665e-07, + "loss": 0.7066, + "step": 26020 + }, + { + "epoch": 1.84, + "learning_rate": 7.666463435636873e-07, + "loss": 0.6938, + "step": 26030 + }, + { + "epoch": 1.84, + "learning_rate": 7.598301226069443e-07, + "loss": 0.6948, + "step": 26040 + }, + { + "epoch": 1.84, + "learning_rate": 7.53043870872841e-07, + "loss": 0.6797, + "step": 26050 + }, + { + "epoch": 1.84, + "learning_rate": 7.462875967515242e-07, + "loss": 0.7114, + "step": 26060 + }, + { + "epoch": 1.85, + "learning_rate": 7.395613085960873e-07, + "loss": 0.7184, + "step": 26070 + }, + { + "epoch": 1.85, + "learning_rate": 7.328650147225419e-07, + "loss": 0.6855, + "step": 26080 + }, + { + "epoch": 1.85, + "learning_rate": 7.261987234098238e-07, + "loss": 0.7092, + "step": 26090 + }, + { + "epoch": 1.85, + "learning_rate": 7.19562442899771e-07, + "loss": 0.717, + "step": 26100 + }, + { + "epoch": 1.85, + "learning_rate": 7.129561813971147e-07, + "loss": 0.7146, + "step": 26110 + }, + { + "epoch": 1.85, + "learning_rate": 7.06379947069477e-07, + "loss": 0.7148, + "step": 26120 + }, + { + "epoch": 1.85, + "learning_rate": 6.998337480473599e-07, + "loss": 0.688, + "step": 26130 + }, + { + "epoch": 1.85, + "learning_rate": 6.933175924241225e-07, + "loss": 0.7048, + "step": 26140 + }, + { + "epoch": 1.85, + "learning_rate": 6.868314882559845e-07, + "loss": 0.7307, + "step": 26150 + }, + { + "epoch": 1.85, + "learning_rate": 6.803754435620091e-07, + "loss": 0.721, + "step": 26160 + }, + { + "epoch": 1.85, + "learning_rate": 6.739494663241031e-07, + "loss": 0.7126, + "step": 26170 + }, + { + "epoch": 1.85, + "learning_rate": 6.675535644869891e-07, + "loss": 0.695, + "step": 26180 + }, + { + "epoch": 1.85, + "learning_rate": 6.611877459582083e-07, + "loss": 0.6855, + "step": 26190 + }, + { + "epoch": 1.85, + "learning_rate": 6.548520186081181e-07, + "loss": 0.7089, + "step": 26200 + }, + { + "epoch": 1.86, + "learning_rate": 6.485463902698635e-07, + "loss": 0.7166, + "step": 26210 + }, + { + "epoch": 1.86, + "learning_rate": 6.42270868739378e-07, + "loss": 0.6708, + "step": 26220 + }, + { + "epoch": 1.86, + "learning_rate": 6.360254617753719e-07, + "loss": 0.7116, + "step": 26230 + }, + { + "epoch": 1.86, + "learning_rate": 6.29810177099327e-07, + "loss": 0.727, + "step": 26240 + }, + { + "epoch": 1.86, + "learning_rate": 6.236250223954854e-07, + "loss": 0.7041, + "step": 26250 + }, + { + "epoch": 1.86, + "learning_rate": 6.174700053108329e-07, + "loss": 0.7394, + "step": 26260 + }, + { + "epoch": 1.86, + "learning_rate": 6.113451334550934e-07, + "loss": 0.696, + "step": 26270 + }, + { + "epoch": 1.86, + "learning_rate": 6.052504144007265e-07, + "loss": 0.725, + "step": 26280 + }, + { + "epoch": 1.86, + "learning_rate": 5.991858556829072e-07, + "loss": 0.7124, + "step": 26290 + }, + { + "epoch": 1.86, + "learning_rate": 5.93151464799535e-07, + "loss": 0.7376, + "step": 26300 + }, + { + "epoch": 1.86, + "learning_rate": 5.871472492111868e-07, + "loss": 0.7172, + "step": 26310 + }, + { + "epoch": 1.86, + "learning_rate": 5.811732163411549e-07, + "loss": 0.7413, + "step": 26320 + }, + { + "epoch": 1.86, + "learning_rate": 5.75229373575406e-07, + "loss": 0.7297, + "step": 26330 + }, + { + "epoch": 1.86, + "learning_rate": 5.693157282625872e-07, + "loss": 0.7237, + "step": 26340 + }, + { + "epoch": 1.87, + "learning_rate": 5.634322877140025e-07, + "loss": 0.6955, + "step": 26350 + }, + { + "epoch": 1.87, + "learning_rate": 5.575790592036139e-07, + "loss": 0.7184, + "step": 26360 + }, + { + "epoch": 1.87, + "learning_rate": 5.517560499680358e-07, + "loss": 0.7002, + "step": 26370 + }, + { + "epoch": 1.87, + "learning_rate": 5.459632672065229e-07, + "loss": 0.6985, + "step": 26380 + }, + { + "epoch": 1.87, + "learning_rate": 5.402007180809493e-07, + "loss": 0.7, + "step": 26390 + }, + { + "epoch": 1.87, + "learning_rate": 5.344684097158215e-07, + "loss": 0.6975, + "step": 26400 + }, + { + "epoch": 1.87, + "learning_rate": 5.287663491982481e-07, + "loss": 0.6832, + "step": 26410 + }, + { + "epoch": 1.87, + "learning_rate": 5.230945435779483e-07, + "loss": 0.689, + "step": 26420 + }, + { + "epoch": 1.87, + "learning_rate": 5.174529998672322e-07, + "loss": 0.7153, + "step": 26430 + }, + { + "epoch": 1.87, + "learning_rate": 5.118417250409957e-07, + "loss": 0.7424, + "step": 26440 + }, + { + "epoch": 1.87, + "learning_rate": 5.062607260367142e-07, + "loss": 0.6872, + "step": 26450 + }, + { + "epoch": 1.87, + "learning_rate": 5.007100097544293e-07, + "loss": 0.7121, + "step": 26460 + }, + { + "epoch": 1.87, + "learning_rate": 4.95189583056746e-07, + "loss": 0.7051, + "step": 26470 + }, + { + "epoch": 1.87, + "learning_rate": 4.896994527688104e-07, + "loss": 0.7067, + "step": 26480 + }, + { + "epoch": 1.88, + "learning_rate": 4.842396256783288e-07, + "loss": 0.7005, + "step": 26490 + }, + { + "epoch": 1.88, + "learning_rate": 4.788101085355267e-07, + "loss": 0.706, + "step": 26500 + }, + { + "epoch": 1.88, + "learning_rate": 4.7341090805317346e-07, + "loss": 0.7267, + "step": 26510 + }, + { + "epoch": 1.88, + "learning_rate": 4.68042030906532e-07, + "loss": 0.7193, + "step": 26520 + }, + { + "epoch": 1.88, + "learning_rate": 4.6270348373339545e-07, + "loss": 0.6997, + "step": 26530 + }, + { + "epoch": 1.88, + "learning_rate": 4.573952731340536e-07, + "loss": 0.7155, + "step": 26540 + }, + { + "epoch": 1.88, + "learning_rate": 4.5211740567129014e-07, + "loss": 0.6904, + "step": 26550 + }, + { + "epoch": 1.88, + "learning_rate": 4.468698878703631e-07, + "loss": 0.7309, + "step": 26560 + }, + { + "epoch": 1.88, + "learning_rate": 4.416527262190218e-07, + "loss": 0.6915, + "step": 26570 + }, + { + "epoch": 1.88, + "learning_rate": 4.3646592716748423e-07, + "loss": 0.7185, + "step": 26580 + }, + { + "epoch": 1.88, + "learning_rate": 4.3130949712842093e-07, + "loss": 0.695, + "step": 26590 + }, + { + "epoch": 1.88, + "learning_rate": 4.2618344247696284e-07, + "loss": 0.7061, + "step": 26600 + }, + { + "epoch": 1.88, + "learning_rate": 4.210877695506793e-07, + "loss": 0.7159, + "step": 26610 + }, + { + "epoch": 1.88, + "learning_rate": 4.1602248464958914e-07, + "loss": 0.7226, + "step": 26620 + }, + { + "epoch": 1.88, + "learning_rate": 4.109875940361302e-07, + "loss": 0.6967, + "step": 26630 + }, + { + "epoch": 1.89, + "learning_rate": 4.059831039351675e-07, + "loss": 0.6913, + "step": 26640 + }, + { + "epoch": 1.89, + "learning_rate": 4.010090205339767e-07, + "loss": 0.6682, + "step": 26650 + }, + { + "epoch": 1.89, + "learning_rate": 3.960653499822442e-07, + "loss": 0.7328, + "step": 26660 + }, + { + "epoch": 1.89, + "learning_rate": 3.911520983920558e-07, + "loss": 0.7028, + "step": 26670 + }, + { + "epoch": 1.89, + "learning_rate": 3.862692718378885e-07, + "loss": 0.6989, + "step": 26680 + }, + { + "epoch": 1.89, + "learning_rate": 3.8141687635659953e-07, + "loss": 0.6951, + "step": 26690 + }, + { + "epoch": 1.89, + "learning_rate": 3.7659491794742595e-07, + "loss": 0.7256, + "step": 26700 + }, + { + "epoch": 1.89, + "learning_rate": 3.718034025719769e-07, + "loss": 0.6671, + "step": 26710 + }, + { + "epoch": 1.89, + "learning_rate": 3.670423361542191e-07, + "loss": 0.7007, + "step": 26720 + }, + { + "epoch": 1.89, + "learning_rate": 3.623117245804719e-07, + "loss": 0.6817, + "step": 26730 + }, + { + "epoch": 1.89, + "learning_rate": 3.576115736994096e-07, + "loss": 0.7201, + "step": 26740 + }, + { + "epoch": 1.89, + "learning_rate": 3.5294188932203653e-07, + "loss": 0.7237, + "step": 26750 + }, + { + "epoch": 1.89, + "learning_rate": 3.4830267722170684e-07, + "loss": 0.6799, + "step": 26760 + }, + { + "epoch": 1.89, + "learning_rate": 3.436939431340769e-07, + "loss": 0.7261, + "step": 26770 + }, + { + "epoch": 1.9, + "learning_rate": 3.3911569275713594e-07, + "loss": 0.703, + "step": 26780 + }, + { + "epoch": 1.9, + "learning_rate": 3.3456793175118685e-07, + "loss": 0.6719, + "step": 26790 + }, + { + "epoch": 1.9, + "learning_rate": 3.300506657388347e-07, + "loss": 0.6846, + "step": 26800 + }, + { + "epoch": 1.9, + "learning_rate": 3.25563900304976e-07, + "loss": 0.6572, + "step": 26810 + }, + { + "epoch": 1.9, + "learning_rate": 3.211076409968039e-07, + "loss": 0.722, + "step": 26820 + }, + { + "epoch": 1.9, + "learning_rate": 3.1668189332379194e-07, + "loss": 0.7193, + "step": 26830 + }, + { + "epoch": 1.9, + "learning_rate": 3.1228666275769925e-07, + "loss": 0.7138, + "step": 26840 + }, + { + "epoch": 1.9, + "learning_rate": 3.079219547325429e-07, + "loss": 0.7114, + "step": 26850 + }, + { + "epoch": 1.9, + "learning_rate": 3.03587774644612e-07, + "loss": 0.6791, + "step": 26860 + }, + { + "epoch": 1.9, + "learning_rate": 2.9928412785244777e-07, + "loss": 0.6882, + "step": 26870 + }, + { + "epoch": 1.9, + "learning_rate": 2.950110196768496e-07, + "loss": 0.7039, + "step": 26880 + }, + { + "epoch": 1.9, + "learning_rate": 2.9076845540084993e-07, + "loss": 0.6818, + "step": 26890 + }, + { + "epoch": 1.9, + "learning_rate": 2.8655644026972514e-07, + "loss": 0.7056, + "step": 26900 + }, + { + "epoch": 1.9, + "learning_rate": 2.823749794909819e-07, + "loss": 0.7029, + "step": 26910 + }, + { + "epoch": 1.91, + "learning_rate": 2.782240782343487e-07, + "loss": 0.7107, + "step": 26920 + }, + { + "epoch": 1.91, + "learning_rate": 2.741037416317732e-07, + "loss": 0.7263, + "step": 26930 + }, + { + "epoch": 1.91, + "learning_rate": 2.700139747774166e-07, + "loss": 0.6971, + "step": 26940 + }, + { + "epoch": 1.91, + "learning_rate": 2.6595478272763985e-07, + "loss": 0.7139, + "step": 26950 + }, + { + "epoch": 1.91, + "learning_rate": 2.6192617050100897e-07, + "loss": 0.6651, + "step": 26960 + }, + { + "epoch": 1.91, + "learning_rate": 2.579281430782815e-07, + "loss": 0.7181, + "step": 26970 + }, + { + "epoch": 1.91, + "learning_rate": 2.539607054023979e-07, + "loss": 0.6995, + "step": 26980 + }, + { + "epoch": 1.91, + "learning_rate": 2.50023862378479e-07, + "loss": 0.699, + "step": 26990 + }, + { + "epoch": 1.91, + "learning_rate": 2.4611761887382844e-07, + "loss": 0.718, + "step": 27000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 6.572189972982399e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-27000/training_args.bin b/checkpoint-27000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-27000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-28000/README.md b/checkpoint-28000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-28000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-28000/adapter_config.json b/checkpoint-28000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-28000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-28000/adapter_model.bin b/checkpoint-28000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..480070f4083372a3f102f76f8a182b4bedf212bf --- /dev/null +++ b/checkpoint-28000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9e3e7972bcf0b9f281b80b629f9746ea24969e4d1c6fa89668b799981125ad04 +size 16821197 diff --git a/checkpoint-28000/finetuning_args.json b/checkpoint-28000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-28000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-28000/optimizer.pt b/checkpoint-28000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..28d5440b594f69070014c1f281558621ae859b97 --- /dev/null +++ b/checkpoint-28000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9cea5a94984f0cd0e4368302102d16b4d918d2091ed168860b100112a09eff1 +size 33661637 diff --git a/checkpoint-28000/rng_state.pth b/checkpoint-28000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..36ac9e97aeeec57ebafd6d209d98dca113a78450 --- /dev/null +++ b/checkpoint-28000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:638d0ac67adb613f1b31c12c748b5758ca925c54399a095a6a29514eef904344 +size 18663 diff --git a/checkpoint-28000/scheduler.pt b/checkpoint-28000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cb885c518f5ebef4e992e6e97c8924db86cf580e --- /dev/null +++ b/checkpoint-28000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:150268b4a14fd8bfeecf8edb6097cd2bdb5801abae1c80f65135e236b8edf4c5 +size 627 diff --git a/checkpoint-28000/trainer_state.json b/checkpoint-28000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..47c84e618739bade6db60156beda0dfeeb190147 --- /dev/null +++ b/checkpoint-28000/trainer_state.json @@ -0,0 +1,16816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9819150253932862, + "global_step": 28000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 4.143065996544804e-06, + "loss": 0.7359, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 4.1277527325498e-06, + "loss": 0.6894, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 4.112465273861799e-06, + "loss": 0.7237, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 4.097203639381364e-06, + "loss": 0.7028, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 4.081967847977164e-06, + "loss": 0.7038, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 4.066757918485886e-06, + "loss": 0.711, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 4.0515738697122694e-06, + "loss": 0.717, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 4.036415720429027e-06, + "loss": 0.7134, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 4.021283489376876e-06, + "loss": 0.709, + "step": 23090 + }, + { + "epoch": 1.64, + "learning_rate": 4.006177195264488e-06, + "loss": 0.7266, + "step": 23100 + }, + { + "epoch": 1.64, + "learning_rate": 3.9910968567684506e-06, + "loss": 0.6872, + "step": 23110 + }, + { + "epoch": 1.64, + "learning_rate": 3.976042492533269e-06, + "loss": 0.7256, + "step": 23120 + }, + { + "epoch": 1.64, + "learning_rate": 3.961014121171342e-06, + "loss": 0.7437, + "step": 23130 + }, + { + "epoch": 1.64, + "learning_rate": 3.946011761262932e-06, + "loss": 0.7111, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.931035431356134e-06, + "loss": 0.697, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.916085149966856e-06, + "loss": 0.7258, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.901160935578815e-06, + "loss": 0.7029, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.8862628066435065e-06, + "loss": 0.686, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.8713907815801534e-06, + "loss": 0.6994, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.856544878775708e-06, + "loss": 0.7039, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.841725116584846e-06, + "loss": 0.7096, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.8269315133299126e-06, + "loss": 0.7029, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.8121640873009067e-06, + "loss": 0.7133, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.7974228567554617e-06, + "loss": 0.7054, + "step": 23240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7827078399188393e-06, + "loss": 0.7077, + "step": 23250 + }, + { + "epoch": 1.65, + "learning_rate": 3.7680190549838847e-06, + "loss": 0.6985, + "step": 23260 + }, + { + "epoch": 1.65, + "learning_rate": 3.753356520111004e-06, + "loss": 0.7222, + "step": 23270 + }, + { + "epoch": 1.65, + "learning_rate": 3.738720253428152e-06, + "loss": 0.7174, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.724110273030812e-06, + "loss": 0.6935, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.709526596981977e-06, + "loss": 0.7094, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.6949692433120907e-06, + "loss": 0.7029, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.6804382300190844e-06, + "loss": 0.7145, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.665933575068298e-06, + "loss": 0.7247, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.6514552963925004e-06, + "loss": 0.7393, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.637003411891854e-06, + "loss": 0.7352, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.622577939433866e-06, + "loss": 0.6873, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.6081788968534066e-06, + "loss": 0.7056, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.5938063019526653e-06, + "loss": 0.7287, + "step": 23380 + }, + { + "epoch": 1.66, + "learning_rate": 3.579460172501142e-06, + "loss": 0.717, + "step": 23390 + }, + { + "epoch": 1.66, + "learning_rate": 3.5651405262356024e-06, + "loss": 0.7258, + "step": 23400 + }, + { + "epoch": 1.66, + "learning_rate": 3.5508473808600674e-06, + "loss": 0.6985, + "step": 23410 + }, + { + "epoch": 1.66, + "learning_rate": 3.5365807540458097e-06, + "loss": 0.7059, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.522340663431314e-06, + "loss": 0.7047, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.5081271266222434e-06, + "loss": 0.7064, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.4939401611914337e-06, + "loss": 0.6804, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.479779784678877e-06, + "loss": 0.7099, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 3.465646014591703e-06, + "loss": 0.7182, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4515388684041193e-06, + "loss": 0.6964, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 3.437458363557433e-06, + "loss": 0.707, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 3.4234045174600103e-06, + "loss": 0.6729, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 3.409377347487272e-06, + "loss": 0.6822, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 3.395376870981634e-06, + "loss": 0.7339, + "step": 23520 + }, + { + "epoch": 1.67, + "learning_rate": 3.3814031052525175e-06, + "loss": 0.7011, + "step": 23530 + }, + { + "epoch": 1.67, + "learning_rate": 3.367456067576327e-06, + "loss": 0.7216, + "step": 23540 + }, + { + "epoch": 1.67, + "learning_rate": 3.353535775196423e-06, + "loss": 0.7194, + "step": 23550 + }, + { + "epoch": 1.67, + "learning_rate": 3.339642245323102e-06, + "loss": 0.7163, + "step": 23560 + }, + { + "epoch": 1.67, + "learning_rate": 3.325775495133546e-06, + "loss": 0.736, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 3.31193554177186e-06, + "loss": 0.7071, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 3.298122402349002e-06, + "loss": 0.6889, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 3.2843360939427943e-06, + "loss": 0.6933, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 3.270576633597866e-06, + "loss": 0.699, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 3.2568440383256598e-06, + "loss": 0.6603, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 3.243138325104411e-06, + "loss": 0.6971, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 3.230826181364585e-06, + "loss": 0.7269, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 3.217171590696141e-06, + "loss": 0.7277, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 3.2035439311275006e-06, + "loss": 0.6943, + "step": 23660 + }, + { + "epoch": 1.68, + "learning_rate": 3.1899432195071575e-06, + "loss": 0.7204, + "step": 23670 + }, + { + "epoch": 1.68, + "learning_rate": 3.176369472650292e-06, + "loss": 0.7241, + "step": 23680 + }, + { + "epoch": 1.68, + "learning_rate": 3.1628227073387474e-06, + "loss": 0.6712, + "step": 23690 + }, + { + "epoch": 1.68, + "learning_rate": 3.1493029403209973e-06, + "loss": 0.6877, + "step": 23700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1358101883121547e-06, + "loss": 0.6953, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 3.122344467993907e-06, + "loss": 0.6918, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 3.1089057960145498e-06, + "loss": 0.6866, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 3.0954941889889096e-06, + "loss": 0.6975, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 3.082109663498378e-06, + "loss": 0.7213, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 3.068752236090841e-06, + "loss": 0.7225, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 3.055421923280702e-06, + "loss": 0.7064, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 3.0421187415488246e-06, + "loss": 0.696, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 3.028842707342541e-06, + "loss": 0.7251, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 3.0155938370756116e-06, + "loss": 0.7075, + "step": 23800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0023721471282214e-06, + "loss": 0.7181, + "step": 23810 + }, + { + "epoch": 1.69, + "learning_rate": 2.9891776538469362e-06, + "loss": 0.6982, + "step": 23820 + }, + { + "epoch": 1.69, + "learning_rate": 2.9760103735447186e-06, + "loss": 0.6984, + "step": 23830 + }, + { + "epoch": 1.69, + "learning_rate": 2.962870322500866e-06, + "loss": 0.7363, + "step": 23840 + }, + { + "epoch": 1.69, + "learning_rate": 2.9497575169610243e-06, + "loss": 0.6743, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.9366719731371563e-06, + "loss": 0.7141, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.9236137072075067e-06, + "loss": 0.7228, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.910582735316597e-06, + "loss": 0.7028, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.8975790735752186e-06, + "loss": 0.7098, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.8846027380603908e-06, + "loss": 0.6907, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.8716537448153364e-06, + "loss": 0.7226, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.8587321098494963e-06, + "loss": 0.7298, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.8458378491384606e-06, + "loss": 0.7172, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.832970978624e-06, + "loss": 0.7065, + "step": 23940 + }, + { + "epoch": 1.7, + "learning_rate": 2.8201315142140055e-06, + "loss": 0.6787, + "step": 23950 + }, + { + "epoch": 1.7, + "learning_rate": 2.8073194717824935e-06, + "loss": 0.6846, + "step": 23960 + }, + { + "epoch": 1.7, + "learning_rate": 2.794534867169568e-06, + "loss": 0.7014, + "step": 23970 + }, + { + "epoch": 1.7, + "learning_rate": 2.7817777161814245e-06, + "loss": 0.721, + "step": 23980 + }, + { + "epoch": 1.7, + "learning_rate": 2.769048034590299e-06, + "loss": 0.7459, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.7563458381344858e-06, + "loss": 0.6801, + "step": 24000 + }, + { + "epoch": 1.7, + "learning_rate": 2.7436711425182726e-06, + "loss": 0.7142, + "step": 24010 + }, + { + "epoch": 1.7, + "learning_rate": 2.7310239634119744e-06, + "loss": 0.6988, + "step": 24020 + }, + { + "epoch": 1.7, + "learning_rate": 2.718404316451864e-06, + "loss": 0.7297, + "step": 24030 + }, + { + "epoch": 1.7, + "learning_rate": 2.7058122172401916e-06, + "loss": 0.672, + "step": 24040 + }, + { + "epoch": 1.7, + "learning_rate": 2.693247681345132e-06, + "loss": 0.7278, + "step": 24050 + }, + { + "epoch": 1.7, + "learning_rate": 2.680710724300803e-06, + "loss": 0.7055, + "step": 24060 + }, + { + "epoch": 1.7, + "learning_rate": 2.6682013616072005e-06, + "loss": 0.7266, + "step": 24070 + }, + { + "epoch": 1.7, + "learning_rate": 2.655719608730231e-06, + "loss": 0.7121, + "step": 24080 + }, + { + "epoch": 1.71, + "learning_rate": 2.6432654811016395e-06, + "loss": 0.6943, + "step": 24090 + }, + { + "epoch": 1.71, + "learning_rate": 2.630838994119042e-06, + "loss": 0.709, + "step": 24100 + }, + { + "epoch": 1.71, + "learning_rate": 2.618440163145855e-06, + "loss": 0.7115, + "step": 24110 + }, + { + "epoch": 1.71, + "learning_rate": 2.60606900351133e-06, + "loss": 0.7056, + "step": 24120 + }, + { + "epoch": 1.71, + "learning_rate": 2.5937255305104825e-06, + "loss": 0.7071, + "step": 24130 + }, + { + "epoch": 1.71, + "learning_rate": 2.581409759404113e-06, + "loss": 0.6953, + "step": 24140 + }, + { + "epoch": 1.71, + "learning_rate": 2.5691217054187726e-06, + "loss": 0.7178, + "step": 24150 + }, + { + "epoch": 1.71, + "learning_rate": 2.556861383746731e-06, + "loss": 0.7034, + "step": 24160 + }, + { + "epoch": 1.71, + "learning_rate": 2.54462880954599e-06, + "loss": 0.7028, + "step": 24170 + }, + { + "epoch": 1.71, + "learning_rate": 2.532423997940231e-06, + "loss": 0.6898, + "step": 24180 + }, + { + "epoch": 1.71, + "learning_rate": 2.5202469640188187e-06, + "loss": 0.7106, + "step": 24190 + }, + { + "epoch": 1.71, + "learning_rate": 2.508097722836769e-06, + "loss": 0.7149, + "step": 24200 + }, + { + "epoch": 1.71, + "learning_rate": 2.4959762894147503e-06, + "loss": 0.7059, + "step": 24210 + }, + { + "epoch": 1.71, + "learning_rate": 2.48388267873903e-06, + "loss": 0.6858, + "step": 24220 + }, + { + "epoch": 1.72, + "learning_rate": 2.4718169057614953e-06, + "loss": 0.6949, + "step": 24230 + }, + { + "epoch": 1.72, + "learning_rate": 2.4597789853996022e-06, + "loss": 0.7374, + "step": 24240 + }, + { + "epoch": 1.72, + "learning_rate": 2.4477689325363875e-06, + "loss": 0.6925, + "step": 24250 + }, + { + "epoch": 1.72, + "learning_rate": 2.4357867620204174e-06, + "loss": 0.7254, + "step": 24260 + }, + { + "epoch": 1.72, + "learning_rate": 2.4238324886657976e-06, + "loss": 0.7108, + "step": 24270 + }, + { + "epoch": 1.72, + "learning_rate": 2.411906127252134e-06, + "loss": 0.7205, + "step": 24280 + }, + { + "epoch": 1.72, + "learning_rate": 2.4000076925245364e-06, + "loss": 0.7125, + "step": 24290 + }, + { + "epoch": 1.72, + "learning_rate": 2.388137199193571e-06, + "loss": 0.7166, + "step": 24300 + }, + { + "epoch": 1.72, + "learning_rate": 2.3762946619352773e-06, + "loss": 0.7536, + "step": 24310 + }, + { + "epoch": 1.72, + "learning_rate": 2.364480095391114e-06, + "loss": 0.6741, + "step": 24320 + }, + { + "epoch": 1.72, + "learning_rate": 2.3526935141679695e-06, + "loss": 0.7399, + "step": 24330 + }, + { + "epoch": 1.72, + "learning_rate": 2.3409349328381264e-06, + "loss": 0.6885, + "step": 24340 + }, + { + "epoch": 1.72, + "learning_rate": 2.329204365939261e-06, + "loss": 0.6654, + "step": 24350 + }, + { + "epoch": 1.72, + "learning_rate": 2.3175018279743943e-06, + "loss": 0.7019, + "step": 24360 + }, + { + "epoch": 1.72, + "learning_rate": 2.305827333411914e-06, + "loss": 0.6802, + "step": 24370 + }, + { + "epoch": 1.73, + "learning_rate": 2.2941808966855217e-06, + "loss": 0.6845, + "step": 24380 + }, + { + "epoch": 1.73, + "learning_rate": 2.2825625321942433e-06, + "loss": 0.7087, + "step": 24390 + }, + { + "epoch": 1.73, + "learning_rate": 2.270972254302381e-06, + "loss": 0.6803, + "step": 24400 + }, + { + "epoch": 1.73, + "learning_rate": 2.259410077339527e-06, + "loss": 0.7212, + "step": 24410 + }, + { + "epoch": 1.73, + "learning_rate": 2.2478760156005278e-06, + "loss": 0.7025, + "step": 24420 + }, + { + "epoch": 1.73, + "learning_rate": 2.2363700833454627e-06, + "loss": 0.6774, + "step": 24430 + }, + { + "epoch": 1.73, + "learning_rate": 2.2248922947996464e-06, + "loss": 0.7134, + "step": 24440 + }, + { + "epoch": 1.73, + "learning_rate": 2.2134426641535756e-06, + "loss": 0.71, + "step": 24450 + }, + { + "epoch": 1.73, + "learning_rate": 2.202021205562965e-06, + "loss": 0.7253, + "step": 24460 + }, + { + "epoch": 1.73, + "learning_rate": 2.19062793314867e-06, + "loss": 0.7008, + "step": 24470 + }, + { + "epoch": 1.73, + "learning_rate": 2.179262860996717e-06, + "loss": 0.7299, + "step": 24480 + }, + { + "epoch": 1.73, + "learning_rate": 2.1679260031582575e-06, + "loss": 0.7259, + "step": 24490 + }, + { + "epoch": 1.73, + "learning_rate": 2.1566173736495663e-06, + "loss": 0.6953, + "step": 24500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1453369864520125e-06, + "loss": 0.7024, + "step": 24510 + }, + { + "epoch": 1.74, + "learning_rate": 2.134084855512056e-06, + "loss": 0.7059, + "step": 24520 + }, + { + "epoch": 1.74, + "learning_rate": 2.1228609947412074e-06, + "loss": 0.6975, + "step": 24530 + }, + { + "epoch": 1.74, + "learning_rate": 2.111665418016051e-06, + "loss": 0.7137, + "step": 24540 + }, + { + "epoch": 1.74, + "learning_rate": 2.1004981391781697e-06, + "loss": 0.7219, + "step": 24550 + }, + { + "epoch": 1.74, + "learning_rate": 2.0893591720341888e-06, + "loss": 0.7267, + "step": 24560 + }, + { + "epoch": 1.74, + "learning_rate": 2.0782485303557144e-06, + "loss": 0.7114, + "step": 24570 + }, + { + "epoch": 1.74, + "learning_rate": 2.0671662278793387e-06, + "loss": 0.7075, + "step": 24580 + }, + { + "epoch": 1.74, + "learning_rate": 2.0561122783066128e-06, + "loss": 0.6794, + "step": 24590 + }, + { + "epoch": 1.74, + "learning_rate": 2.0450866953040436e-06, + "loss": 0.7195, + "step": 24600 + }, + { + "epoch": 1.74, + "learning_rate": 2.034089492503052e-06, + "loss": 0.7097, + "step": 24610 + }, + { + "epoch": 1.74, + "learning_rate": 2.0231206834999793e-06, + "loss": 0.7114, + "step": 24620 + }, + { + "epoch": 1.74, + "learning_rate": 2.0121802818560716e-06, + "loss": 0.7295, + "step": 24630 + }, + { + "epoch": 1.74, + "learning_rate": 2.0012683010974366e-06, + "loss": 0.7161, + "step": 24640 + }, + { + "epoch": 1.74, + "learning_rate": 1.9903847547150495e-06, + "loss": 0.6945, + "step": 24650 + }, + { + "epoch": 1.75, + "learning_rate": 1.979529656164736e-06, + "loss": 0.7025, + "step": 24660 + }, + { + "epoch": 1.75, + "learning_rate": 1.9687030188671517e-06, + "loss": 0.7047, + "step": 24670 + }, + { + "epoch": 1.75, + "learning_rate": 1.957904856207754e-06, + "loss": 0.6837, + "step": 24680 + }, + { + "epoch": 1.75, + "learning_rate": 1.9471351815367995e-06, + "loss": 0.7309, + "step": 24690 + }, + { + "epoch": 1.75, + "learning_rate": 1.9363940081693287e-06, + "loss": 0.7112, + "step": 24700 + }, + { + "epoch": 1.75, + "learning_rate": 1.92568134938515e-06, + "loss": 0.698, + "step": 24710 + }, + { + "epoch": 1.75, + "learning_rate": 1.914997218428796e-06, + "loss": 0.7365, + "step": 24720 + }, + { + "epoch": 1.75, + "learning_rate": 1.9043416285095568e-06, + "loss": 0.6988, + "step": 24730 + }, + { + "epoch": 1.75, + "learning_rate": 1.8937145928014073e-06, + "loss": 0.7022, + "step": 24740 + }, + { + "epoch": 1.75, + "learning_rate": 1.883116124443049e-06, + "loss": 0.7207, + "step": 24750 + }, + { + "epoch": 1.75, + "learning_rate": 1.8725462365378366e-06, + "loss": 0.7202, + "step": 24760 + }, + { + "epoch": 1.75, + "learning_rate": 1.862004942153811e-06, + "loss": 0.7263, + "step": 24770 + }, + { + "epoch": 1.75, + "learning_rate": 1.85149225432365e-06, + "loss": 0.7123, + "step": 24780 + }, + { + "epoch": 1.75, + "learning_rate": 1.8410081860446682e-06, + "loss": 0.7025, + "step": 24790 + }, + { + "epoch": 1.76, + "learning_rate": 1.8305527502787912e-06, + "loss": 0.7225, + "step": 24800 + }, + { + "epoch": 1.76, + "learning_rate": 1.8201259599525567e-06, + "loss": 0.6956, + "step": 24810 + }, + { + "epoch": 1.76, + "learning_rate": 1.8097278279570696e-06, + "loss": 0.716, + "step": 24820 + }, + { + "epoch": 1.76, + "learning_rate": 1.7993583671480213e-06, + "loss": 0.6895, + "step": 24830 + }, + { + "epoch": 1.76, + "learning_rate": 1.7890175903456403e-06, + "loss": 0.7089, + "step": 24840 + }, + { + "epoch": 1.76, + "learning_rate": 1.778705510334705e-06, + "loss": 0.7165, + "step": 24850 + }, + { + "epoch": 1.76, + "learning_rate": 1.7684221398645007e-06, + "loss": 0.6951, + "step": 24860 + }, + { + "epoch": 1.76, + "learning_rate": 1.7581674916488267e-06, + "loss": 0.6925, + "step": 24870 + }, + { + "epoch": 1.76, + "learning_rate": 1.7479415783659774e-06, + "loss": 0.724, + "step": 24880 + }, + { + "epoch": 1.76, + "learning_rate": 1.7377444126587094e-06, + "loss": 0.6585, + "step": 24890 + }, + { + "epoch": 1.76, + "learning_rate": 1.7275760071342379e-06, + "loss": 0.7301, + "step": 24900 + }, + { + "epoch": 1.76, + "learning_rate": 1.7174363743642257e-06, + "loss": 0.6777, + "step": 24910 + }, + { + "epoch": 1.76, + "learning_rate": 1.7073255268847676e-06, + "loss": 0.6788, + "step": 24920 + }, + { + "epoch": 1.76, + "learning_rate": 1.6972434771963586e-06, + "loss": 0.7013, + "step": 24930 + }, + { + "epoch": 1.77, + "learning_rate": 1.6871902377638915e-06, + "loss": 0.6847, + "step": 24940 + }, + { + "epoch": 1.77, + "learning_rate": 1.6771658210166436e-06, + "loss": 0.7232, + "step": 24950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6671702393482646e-06, + "loss": 0.7024, + "step": 24960 + }, + { + "epoch": 1.77, + "learning_rate": 1.657203505116739e-06, + "loss": 0.674, + "step": 24970 + }, + { + "epoch": 1.77, + "learning_rate": 1.647265630644393e-06, + "loss": 0.7264, + "step": 24980 + }, + { + "epoch": 1.77, + "learning_rate": 1.6373566282178704e-06, + "loss": 0.703, + "step": 24990 + }, + { + "epoch": 1.77, + "learning_rate": 1.6274765100881272e-06, + "loss": 0.7121, + "step": 25000 + }, + { + "epoch": 1.77, + "learning_rate": 1.6176252884703924e-06, + "loss": 0.7126, + "step": 25010 + }, + { + "epoch": 1.77, + "learning_rate": 1.6078029755441926e-06, + "loss": 0.7127, + "step": 25020 + }, + { + "epoch": 1.77, + "learning_rate": 1.5980095834532832e-06, + "loss": 0.7084, + "step": 25030 + }, + { + "epoch": 1.77, + "learning_rate": 1.5882451243056928e-06, + "loss": 0.6787, + "step": 25040 + }, + { + "epoch": 1.77, + "learning_rate": 1.578509610173659e-06, + "loss": 0.7314, + "step": 25050 + }, + { + "epoch": 1.77, + "learning_rate": 1.5688030530936425e-06, + "loss": 0.7144, + "step": 25060 + }, + { + "epoch": 1.77, + "learning_rate": 1.559125465066294e-06, + "loss": 0.7208, + "step": 25070 + }, + { + "epoch": 1.78, + "learning_rate": 1.5494768580564618e-06, + "loss": 0.7474, + "step": 25080 + }, + { + "epoch": 1.78, + "learning_rate": 1.5398572439931591e-06, + "loss": 0.7082, + "step": 25090 + }, + { + "epoch": 1.78, + "learning_rate": 1.5302666347695477e-06, + "loss": 0.7083, + "step": 25100 + }, + { + "epoch": 1.78, + "learning_rate": 1.5207050422429286e-06, + "loss": 0.7155, + "step": 25110 + }, + { + "epoch": 1.78, + "learning_rate": 1.5111724782347375e-06, + "loss": 0.6739, + "step": 25120 + }, + { + "epoch": 1.78, + "learning_rate": 1.501668954530519e-06, + "loss": 0.7285, + "step": 25130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4921944828799105e-06, + "loss": 0.6637, + "step": 25140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4827490749966227e-06, + "loss": 0.6976, + "step": 25150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4733327425584504e-06, + "loss": 0.7068, + "step": 25160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4639454972072337e-06, + "loss": 0.6992, + "step": 25170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4545873505488471e-06, + "loss": 0.694, + "step": 25180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4452583141531888e-06, + "loss": 0.7144, + "step": 25190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4359583995541741e-06, + "loss": 0.7099, + "step": 25200 + }, + { + "epoch": 1.78, + "learning_rate": 1.426687618249714e-06, + "loss": 0.6898, + "step": 25210 + }, + { + "epoch": 1.79, + "learning_rate": 1.4174459817016927e-06, + "loss": 0.6764, + "step": 25220 + }, + { + "epoch": 1.79, + "learning_rate": 1.4082335013359593e-06, + "loss": 0.6904, + "step": 25230 + }, + { + "epoch": 1.79, + "learning_rate": 1.3990501885423251e-06, + "loss": 0.7081, + "step": 25240 + }, + { + "epoch": 1.79, + "learning_rate": 1.3898960546745443e-06, + "loss": 0.6962, + "step": 25250 + }, + { + "epoch": 1.79, + "learning_rate": 1.3807711110502802e-06, + "loss": 0.7231, + "step": 25260 + }, + { + "epoch": 1.79, + "learning_rate": 1.3716753689511142e-06, + "loss": 0.7064, + "step": 25270 + }, + { + "epoch": 1.79, + "learning_rate": 1.3626088396225284e-06, + "loss": 0.6942, + "step": 25280 + }, + { + "epoch": 1.79, + "learning_rate": 1.353571534273887e-06, + "loss": 0.7291, + "step": 25290 + }, + { + "epoch": 1.79, + "learning_rate": 1.3445634640784133e-06, + "loss": 0.6808, + "step": 25300 + }, + { + "epoch": 1.79, + "learning_rate": 1.3355846401732015e-06, + "loss": 0.6962, + "step": 25310 + }, + { + "epoch": 1.79, + "learning_rate": 1.3266350736591744e-06, + "loss": 0.736, + "step": 25320 + }, + { + "epoch": 1.79, + "learning_rate": 1.3177147756010893e-06, + "loss": 0.6821, + "step": 25330 + }, + { + "epoch": 1.79, + "learning_rate": 1.3088237570275165e-06, + "loss": 0.6935, + "step": 25340 + }, + { + "epoch": 1.79, + "learning_rate": 1.2999620289308263e-06, + "loss": 0.7366, + "step": 25350 + }, + { + "epoch": 1.8, + "learning_rate": 1.2911296022671716e-06, + "loss": 0.7145, + "step": 25360 + }, + { + "epoch": 1.8, + "learning_rate": 1.2823264879564838e-06, + "loss": 0.6949, + "step": 25370 + }, + { + "epoch": 1.8, + "learning_rate": 1.2735526968824575e-06, + "loss": 0.7115, + "step": 25380 + }, + { + "epoch": 1.8, + "learning_rate": 1.264808239892526e-06, + "loss": 0.7214, + "step": 25390 + }, + { + "epoch": 1.8, + "learning_rate": 1.2560931277978526e-06, + "loss": 0.7128, + "step": 25400 + }, + { + "epoch": 1.8, + "learning_rate": 1.2474073713733353e-06, + "loss": 0.7109, + "step": 25410 + }, + { + "epoch": 1.8, + "learning_rate": 1.238750981357567e-06, + "loss": 0.7233, + "step": 25420 + }, + { + "epoch": 1.8, + "learning_rate": 1.2301239684528342e-06, + "loss": 0.7049, + "step": 25430 + }, + { + "epoch": 1.8, + "learning_rate": 1.2215263433250995e-06, + "loss": 0.7242, + "step": 25440 + }, + { + "epoch": 1.8, + "learning_rate": 1.2129581166040043e-06, + "loss": 0.6809, + "step": 25450 + }, + { + "epoch": 1.8, + "learning_rate": 1.2044192988828362e-06, + "loss": 0.7136, + "step": 25460 + }, + { + "epoch": 1.8, + "learning_rate": 1.1959099007185226e-06, + "loss": 0.6874, + "step": 25470 + }, + { + "epoch": 1.8, + "learning_rate": 1.1874299326316147e-06, + "loss": 0.7122, + "step": 25480 + }, + { + "epoch": 1.8, + "learning_rate": 1.1789794051062815e-06, + "loss": 0.7186, + "step": 25490 + }, + { + "epoch": 1.8, + "learning_rate": 1.1705583285903043e-06, + "loss": 0.7135, + "step": 25500 + }, + { + "epoch": 1.81, + "learning_rate": 1.162166713495033e-06, + "loss": 0.6536, + "step": 25510 + }, + { + "epoch": 1.81, + "learning_rate": 1.1538045701954047e-06, + "loss": 0.6984, + "step": 25520 + }, + { + "epoch": 1.81, + "learning_rate": 1.1454719090299131e-06, + "loss": 0.7201, + "step": 25530 + }, + { + "epoch": 1.81, + "learning_rate": 1.1371687403006147e-06, + "loss": 0.6937, + "step": 25540 + }, + { + "epoch": 1.81, + "learning_rate": 1.1288950742730898e-06, + "loss": 0.7263, + "step": 25550 + }, + { + "epoch": 1.81, + "learning_rate": 1.1206509211764415e-06, + "loss": 0.6799, + "step": 25560 + }, + { + "epoch": 1.81, + "learning_rate": 1.1124362912032949e-06, + "loss": 0.6887, + "step": 25570 + }, + { + "epoch": 1.81, + "learning_rate": 1.10425119450977e-06, + "loss": 0.7054, + "step": 25580 + }, + { + "epoch": 1.81, + "learning_rate": 1.0960956412154777e-06, + "loss": 0.7283, + "step": 25590 + }, + { + "epoch": 1.81, + "learning_rate": 1.087969641403494e-06, + "loss": 0.7374, + "step": 25600 + }, + { + "epoch": 1.81, + "learning_rate": 1.0798732051203603e-06, + "loss": 0.7013, + "step": 25610 + }, + { + "epoch": 1.81, + "learning_rate": 1.0718063423760722e-06, + "loss": 0.6999, + "step": 25620 + }, + { + "epoch": 1.81, + "learning_rate": 1.0637690631440572e-06, + "loss": 0.7123, + "step": 25630 + }, + { + "epoch": 1.81, + "learning_rate": 1.0557613773611697e-06, + "loss": 0.6978, + "step": 25640 + }, + { + "epoch": 1.82, + "learning_rate": 1.0477832949276706e-06, + "loss": 0.7393, + "step": 25650 + }, + { + "epoch": 1.82, + "learning_rate": 1.0398348257072282e-06, + "loss": 0.727, + "step": 25660 + }, + { + "epoch": 1.82, + "learning_rate": 1.0319159795268984e-06, + "loss": 0.7193, + "step": 25670 + }, + { + "epoch": 1.82, + "learning_rate": 1.024026766177108e-06, + "loss": 0.7097, + "step": 25680 + }, + { + "epoch": 1.82, + "learning_rate": 1.0161671954116464e-06, + "loss": 0.7109, + "step": 25690 + }, + { + "epoch": 1.82, + "learning_rate": 1.0083372769476629e-06, + "loss": 0.7086, + "step": 25700 + }, + { + "epoch": 1.82, + "learning_rate": 1.0005370204656418e-06, + "loss": 0.7081, + "step": 25710 + }, + { + "epoch": 1.82, + "learning_rate": 9.927664356093908e-07, + "loss": 0.6914, + "step": 25720 + }, + { + "epoch": 1.82, + "learning_rate": 9.850255319860362e-07, + "loss": 0.6929, + "step": 25730 + }, + { + "epoch": 1.82, + "learning_rate": 9.773143191660116e-07, + "loss": 0.7269, + "step": 25740 + }, + { + "epoch": 1.82, + "learning_rate": 9.696328066830378e-07, + "loss": 0.7243, + "step": 25750 + }, + { + "epoch": 1.82, + "learning_rate": 9.61981004034121e-07, + "loss": 0.718, + "step": 25760 + }, + { + "epoch": 1.82, + "learning_rate": 9.54358920679524e-07, + "loss": 0.7295, + "step": 25770 + }, + { + "epoch": 1.82, + "learning_rate": 9.46766566042781e-07, + "loss": 0.7101, + "step": 25780 + }, + { + "epoch": 1.83, + "learning_rate": 9.392039495106642e-07, + "loss": 0.7296, + "step": 25790 + }, + { + "epoch": 1.83, + "learning_rate": 9.31671080433183e-07, + "loss": 0.7022, + "step": 25800 + }, + { + "epoch": 1.83, + "learning_rate": 9.241679681235572e-07, + "loss": 0.7167, + "step": 25810 + }, + { + "epoch": 1.83, + "learning_rate": 9.166946218582301e-07, + "loss": 0.7109, + "step": 25820 + }, + { + "epoch": 1.83, + "learning_rate": 9.092510508768387e-07, + "loss": 0.7036, + "step": 25830 + }, + { + "epoch": 1.83, + "learning_rate": 9.018372643822132e-07, + "loss": 0.7064, + "step": 25840 + }, + { + "epoch": 1.83, + "learning_rate": 8.944532715403408e-07, + "loss": 0.7124, + "step": 25850 + }, + { + "epoch": 1.83, + "learning_rate": 8.87099081480397e-07, + "loss": 0.7441, + "step": 25860 + }, + { + "epoch": 1.83, + "learning_rate": 8.797747032947001e-07, + "loss": 0.7099, + "step": 25870 + }, + { + "epoch": 1.83, + "learning_rate": 8.724801460387094e-07, + "loss": 0.7087, + "step": 25880 + }, + { + "epoch": 1.83, + "learning_rate": 8.652154187310218e-07, + "loss": 0.7032, + "step": 25890 + }, + { + "epoch": 1.83, + "learning_rate": 8.579805303533417e-07, + "loss": 0.7031, + "step": 25900 + }, + { + "epoch": 1.83, + "learning_rate": 8.507754898504943e-07, + "loss": 0.6833, + "step": 25910 + }, + { + "epoch": 1.83, + "learning_rate": 8.436003061304043e-07, + "loss": 0.7052, + "step": 25920 + }, + { + "epoch": 1.84, + "learning_rate": 8.364549880640671e-07, + "loss": 0.7, + "step": 25930 + }, + { + "epoch": 1.84, + "learning_rate": 8.293395444855662e-07, + "loss": 0.7127, + "step": 25940 + }, + { + "epoch": 1.84, + "learning_rate": 8.222539841920507e-07, + "loss": 0.709, + "step": 25950 + }, + { + "epoch": 1.84, + "learning_rate": 8.151983159437215e-07, + "loss": 0.6866, + "step": 25960 + }, + { + "epoch": 1.84, + "learning_rate": 8.081725484638176e-07, + "loss": 0.7142, + "step": 25970 + }, + { + "epoch": 1.84, + "learning_rate": 8.011766904386154e-07, + "loss": 0.7077, + "step": 25980 + }, + { + "epoch": 1.84, + "learning_rate": 7.942107505174102e-07, + "loss": 0.7226, + "step": 25990 + }, + { + "epoch": 1.84, + "learning_rate": 7.872747373125156e-07, + "loss": 0.7148, + "step": 26000 + }, + { + "epoch": 1.84, + "learning_rate": 7.80368659399236e-07, + "loss": 0.7326, + "step": 26010 + }, + { + "epoch": 1.84, + "learning_rate": 7.734925253158665e-07, + "loss": 0.7066, + "step": 26020 + }, + { + "epoch": 1.84, + "learning_rate": 7.666463435636873e-07, + "loss": 0.6938, + "step": 26030 + }, + { + "epoch": 1.84, + "learning_rate": 7.598301226069443e-07, + "loss": 0.6948, + "step": 26040 + }, + { + "epoch": 1.84, + "learning_rate": 7.53043870872841e-07, + "loss": 0.6797, + "step": 26050 + }, + { + "epoch": 1.84, + "learning_rate": 7.462875967515242e-07, + "loss": 0.7114, + "step": 26060 + }, + { + "epoch": 1.85, + "learning_rate": 7.395613085960873e-07, + "loss": 0.7184, + "step": 26070 + }, + { + "epoch": 1.85, + "learning_rate": 7.328650147225419e-07, + "loss": 0.6855, + "step": 26080 + }, + { + "epoch": 1.85, + "learning_rate": 7.261987234098238e-07, + "loss": 0.7092, + "step": 26090 + }, + { + "epoch": 1.85, + "learning_rate": 7.19562442899771e-07, + "loss": 0.717, + "step": 26100 + }, + { + "epoch": 1.85, + "learning_rate": 7.129561813971147e-07, + "loss": 0.7146, + "step": 26110 + }, + { + "epoch": 1.85, + "learning_rate": 7.06379947069477e-07, + "loss": 0.7148, + "step": 26120 + }, + { + "epoch": 1.85, + "learning_rate": 6.998337480473599e-07, + "loss": 0.688, + "step": 26130 + }, + { + "epoch": 1.85, + "learning_rate": 6.933175924241225e-07, + "loss": 0.7048, + "step": 26140 + }, + { + "epoch": 1.85, + "learning_rate": 6.868314882559845e-07, + "loss": 0.7307, + "step": 26150 + }, + { + "epoch": 1.85, + "learning_rate": 6.803754435620091e-07, + "loss": 0.721, + "step": 26160 + }, + { + "epoch": 1.85, + "learning_rate": 6.739494663241031e-07, + "loss": 0.7126, + "step": 26170 + }, + { + "epoch": 1.85, + "learning_rate": 6.675535644869891e-07, + "loss": 0.695, + "step": 26180 + }, + { + "epoch": 1.85, + "learning_rate": 6.611877459582083e-07, + "loss": 0.6855, + "step": 26190 + }, + { + "epoch": 1.85, + "learning_rate": 6.548520186081181e-07, + "loss": 0.7089, + "step": 26200 + }, + { + "epoch": 1.86, + "learning_rate": 6.485463902698635e-07, + "loss": 0.7166, + "step": 26210 + }, + { + "epoch": 1.86, + "learning_rate": 6.42270868739378e-07, + "loss": 0.6708, + "step": 26220 + }, + { + "epoch": 1.86, + "learning_rate": 6.360254617753719e-07, + "loss": 0.7116, + "step": 26230 + }, + { + "epoch": 1.86, + "learning_rate": 6.29810177099327e-07, + "loss": 0.727, + "step": 26240 + }, + { + "epoch": 1.86, + "learning_rate": 6.236250223954854e-07, + "loss": 0.7041, + "step": 26250 + }, + { + "epoch": 1.86, + "learning_rate": 6.174700053108329e-07, + "loss": 0.7394, + "step": 26260 + }, + { + "epoch": 1.86, + "learning_rate": 6.113451334550934e-07, + "loss": 0.696, + "step": 26270 + }, + { + "epoch": 1.86, + "learning_rate": 6.052504144007265e-07, + "loss": 0.725, + "step": 26280 + }, + { + "epoch": 1.86, + "learning_rate": 5.991858556829072e-07, + "loss": 0.7124, + "step": 26290 + }, + { + "epoch": 1.86, + "learning_rate": 5.93151464799535e-07, + "loss": 0.7376, + "step": 26300 + }, + { + "epoch": 1.86, + "learning_rate": 5.871472492111868e-07, + "loss": 0.7172, + "step": 26310 + }, + { + "epoch": 1.86, + "learning_rate": 5.811732163411549e-07, + "loss": 0.7413, + "step": 26320 + }, + { + "epoch": 1.86, + "learning_rate": 5.75229373575406e-07, + "loss": 0.7297, + "step": 26330 + }, + { + "epoch": 1.86, + "learning_rate": 5.693157282625872e-07, + "loss": 0.7237, + "step": 26340 + }, + { + "epoch": 1.87, + "learning_rate": 5.634322877140025e-07, + "loss": 0.6955, + "step": 26350 + }, + { + "epoch": 1.87, + "learning_rate": 5.575790592036139e-07, + "loss": 0.7184, + "step": 26360 + }, + { + "epoch": 1.87, + "learning_rate": 5.517560499680358e-07, + "loss": 0.7002, + "step": 26370 + }, + { + "epoch": 1.87, + "learning_rate": 5.459632672065229e-07, + "loss": 0.6985, + "step": 26380 + }, + { + "epoch": 1.87, + "learning_rate": 5.402007180809493e-07, + "loss": 0.7, + "step": 26390 + }, + { + "epoch": 1.87, + "learning_rate": 5.344684097158215e-07, + "loss": 0.6975, + "step": 26400 + }, + { + "epoch": 1.87, + "learning_rate": 5.287663491982481e-07, + "loss": 0.6832, + "step": 26410 + }, + { + "epoch": 1.87, + "learning_rate": 5.230945435779483e-07, + "loss": 0.689, + "step": 26420 + }, + { + "epoch": 1.87, + "learning_rate": 5.174529998672322e-07, + "loss": 0.7153, + "step": 26430 + }, + { + "epoch": 1.87, + "learning_rate": 5.118417250409957e-07, + "loss": 0.7424, + "step": 26440 + }, + { + "epoch": 1.87, + "learning_rate": 5.062607260367142e-07, + "loss": 0.6872, + "step": 26450 + }, + { + "epoch": 1.87, + "learning_rate": 5.007100097544293e-07, + "loss": 0.7121, + "step": 26460 + }, + { + "epoch": 1.87, + "learning_rate": 4.95189583056746e-07, + "loss": 0.7051, + "step": 26470 + }, + { + "epoch": 1.87, + "learning_rate": 4.896994527688104e-07, + "loss": 0.7067, + "step": 26480 + }, + { + "epoch": 1.88, + "learning_rate": 4.842396256783288e-07, + "loss": 0.7005, + "step": 26490 + }, + { + "epoch": 1.88, + "learning_rate": 4.788101085355267e-07, + "loss": 0.706, + "step": 26500 + }, + { + "epoch": 1.88, + "learning_rate": 4.7341090805317346e-07, + "loss": 0.7267, + "step": 26510 + }, + { + "epoch": 1.88, + "learning_rate": 4.68042030906532e-07, + "loss": 0.7193, + "step": 26520 + }, + { + "epoch": 1.88, + "learning_rate": 4.6270348373339545e-07, + "loss": 0.6997, + "step": 26530 + }, + { + "epoch": 1.88, + "learning_rate": 4.573952731340536e-07, + "loss": 0.7155, + "step": 26540 + }, + { + "epoch": 1.88, + "learning_rate": 4.5211740567129014e-07, + "loss": 0.6904, + "step": 26550 + }, + { + "epoch": 1.88, + "learning_rate": 4.468698878703631e-07, + "loss": 0.7309, + "step": 26560 + }, + { + "epoch": 1.88, + "learning_rate": 4.416527262190218e-07, + "loss": 0.6915, + "step": 26570 + }, + { + "epoch": 1.88, + "learning_rate": 4.3646592716748423e-07, + "loss": 0.7185, + "step": 26580 + }, + { + "epoch": 1.88, + "learning_rate": 4.3130949712842093e-07, + "loss": 0.695, + "step": 26590 + }, + { + "epoch": 1.88, + "learning_rate": 4.2618344247696284e-07, + "loss": 0.7061, + "step": 26600 + }, + { + "epoch": 1.88, + "learning_rate": 4.210877695506793e-07, + "loss": 0.7159, + "step": 26610 + }, + { + "epoch": 1.88, + "learning_rate": 4.1602248464958914e-07, + "loss": 0.7226, + "step": 26620 + }, + { + "epoch": 1.88, + "learning_rate": 4.109875940361302e-07, + "loss": 0.6967, + "step": 26630 + }, + { + "epoch": 1.89, + "learning_rate": 4.059831039351675e-07, + "loss": 0.6913, + "step": 26640 + }, + { + "epoch": 1.89, + "learning_rate": 4.010090205339767e-07, + "loss": 0.6682, + "step": 26650 + }, + { + "epoch": 1.89, + "learning_rate": 3.960653499822442e-07, + "loss": 0.7328, + "step": 26660 + }, + { + "epoch": 1.89, + "learning_rate": 3.911520983920558e-07, + "loss": 0.7028, + "step": 26670 + }, + { + "epoch": 1.89, + "learning_rate": 3.862692718378885e-07, + "loss": 0.6989, + "step": 26680 + }, + { + "epoch": 1.89, + "learning_rate": 3.8141687635659953e-07, + "loss": 0.6951, + "step": 26690 + }, + { + "epoch": 1.89, + "learning_rate": 3.7659491794742595e-07, + "loss": 0.7256, + "step": 26700 + }, + { + "epoch": 1.89, + "learning_rate": 3.718034025719769e-07, + "loss": 0.6671, + "step": 26710 + }, + { + "epoch": 1.89, + "learning_rate": 3.670423361542191e-07, + "loss": 0.7007, + "step": 26720 + }, + { + "epoch": 1.89, + "learning_rate": 3.623117245804719e-07, + "loss": 0.6817, + "step": 26730 + }, + { + "epoch": 1.89, + "learning_rate": 3.576115736994096e-07, + "loss": 0.7201, + "step": 26740 + }, + { + "epoch": 1.89, + "learning_rate": 3.5294188932203653e-07, + "loss": 0.7237, + "step": 26750 + }, + { + "epoch": 1.89, + "learning_rate": 3.4830267722170684e-07, + "loss": 0.6799, + "step": 26760 + }, + { + "epoch": 1.89, + "learning_rate": 3.436939431340769e-07, + "loss": 0.7261, + "step": 26770 + }, + { + "epoch": 1.9, + "learning_rate": 3.3911569275713594e-07, + "loss": 0.703, + "step": 26780 + }, + { + "epoch": 1.9, + "learning_rate": 3.3456793175118685e-07, + "loss": 0.6719, + "step": 26790 + }, + { + "epoch": 1.9, + "learning_rate": 3.300506657388347e-07, + "loss": 0.6846, + "step": 26800 + }, + { + "epoch": 1.9, + "learning_rate": 3.25563900304976e-07, + "loss": 0.6572, + "step": 26810 + }, + { + "epoch": 1.9, + "learning_rate": 3.211076409968039e-07, + "loss": 0.722, + "step": 26820 + }, + { + "epoch": 1.9, + "learning_rate": 3.1668189332379194e-07, + "loss": 0.7193, + "step": 26830 + }, + { + "epoch": 1.9, + "learning_rate": 3.1228666275769925e-07, + "loss": 0.7138, + "step": 26840 + }, + { + "epoch": 1.9, + "learning_rate": 3.079219547325429e-07, + "loss": 0.7114, + "step": 26850 + }, + { + "epoch": 1.9, + "learning_rate": 3.03587774644612e-07, + "loss": 0.6791, + "step": 26860 + }, + { + "epoch": 1.9, + "learning_rate": 2.9928412785244777e-07, + "loss": 0.6882, + "step": 26870 + }, + { + "epoch": 1.9, + "learning_rate": 2.950110196768496e-07, + "loss": 0.7039, + "step": 26880 + }, + { + "epoch": 1.9, + "learning_rate": 2.9076845540084993e-07, + "loss": 0.6818, + "step": 26890 + }, + { + "epoch": 1.9, + "learning_rate": 2.8655644026972514e-07, + "loss": 0.7056, + "step": 26900 + }, + { + "epoch": 1.9, + "learning_rate": 2.823749794909819e-07, + "loss": 0.7029, + "step": 26910 + }, + { + "epoch": 1.91, + "learning_rate": 2.782240782343487e-07, + "loss": 0.7107, + "step": 26920 + }, + { + "epoch": 1.91, + "learning_rate": 2.741037416317732e-07, + "loss": 0.7263, + "step": 26930 + }, + { + "epoch": 1.91, + "learning_rate": 2.700139747774166e-07, + "loss": 0.6971, + "step": 26940 + }, + { + "epoch": 1.91, + "learning_rate": 2.6595478272763985e-07, + "loss": 0.7139, + "step": 26950 + }, + { + "epoch": 1.91, + "learning_rate": 2.6192617050100897e-07, + "loss": 0.6651, + "step": 26960 + }, + { + "epoch": 1.91, + "learning_rate": 2.579281430782815e-07, + "loss": 0.7181, + "step": 26970 + }, + { + "epoch": 1.91, + "learning_rate": 2.539607054023979e-07, + "loss": 0.6995, + "step": 26980 + }, + { + "epoch": 1.91, + "learning_rate": 2.50023862378479e-07, + "loss": 0.699, + "step": 26990 + }, + { + "epoch": 1.91, + "learning_rate": 2.4611761887382844e-07, + "loss": 0.718, + "step": 27000 + }, + { + "epoch": 1.91, + "learning_rate": 2.4224197971791094e-07, + "loss": 0.6785, + "step": 27010 + }, + { + "epoch": 1.91, + "learning_rate": 2.3839694970234917e-07, + "loss": 0.7222, + "step": 27020 + }, + { + "epoch": 1.91, + "learning_rate": 2.3458253358093485e-07, + "loss": 0.6785, + "step": 27030 + }, + { + "epoch": 1.91, + "learning_rate": 2.3079873606960122e-07, + "loss": 0.6973, + "step": 27040 + }, + { + "epoch": 1.91, + "learning_rate": 2.27045561846434e-07, + "loss": 0.7146, + "step": 27050 + }, + { + "epoch": 1.92, + "learning_rate": 2.2332301555164638e-07, + "loss": 0.7284, + "step": 27060 + }, + { + "epoch": 1.92, + "learning_rate": 2.1963110178759017e-07, + "loss": 0.702, + "step": 27070 + }, + { + "epoch": 1.92, + "learning_rate": 2.159698251187531e-07, + "loss": 0.7291, + "step": 27080 + }, + { + "epoch": 1.92, + "learning_rate": 2.1233919007173086e-07, + "loss": 0.7087, + "step": 27090 + }, + { + "epoch": 1.92, + "learning_rate": 2.0873920113524958e-07, + "loss": 0.7054, + "step": 27100 + }, + { + "epoch": 1.92, + "learning_rate": 2.0516986276013227e-07, + "loss": 0.6835, + "step": 27110 + }, + { + "epoch": 1.92, + "learning_rate": 2.0163117935931564e-07, + "loss": 0.7169, + "step": 27120 + }, + { + "epoch": 1.92, + "learning_rate": 1.9812315530783886e-07, + "loss": 0.6897, + "step": 27130 + }, + { + "epoch": 1.92, + "learning_rate": 1.9464579494282708e-07, + "loss": 0.7121, + "step": 27140 + }, + { + "epoch": 1.92, + "learning_rate": 1.911991025634996e-07, + "loss": 0.7111, + "step": 27150 + }, + { + "epoch": 1.92, + "learning_rate": 1.8778308243115884e-07, + "loss": 0.7009, + "step": 27160 + }, + { + "epoch": 1.92, + "learning_rate": 1.8439773876918753e-07, + "loss": 0.6859, + "step": 27170 + }, + { + "epoch": 1.92, + "learning_rate": 1.810430757630377e-07, + "loss": 0.7101, + "step": 27180 + }, + { + "epoch": 1.92, + "learning_rate": 1.7771909756023054e-07, + "loss": 0.6874, + "step": 27190 + }, + { + "epoch": 1.93, + "learning_rate": 1.7442580827035094e-07, + "loss": 0.7033, + "step": 27200 + }, + { + "epoch": 1.93, + "learning_rate": 1.7116321196504749e-07, + "loss": 0.66, + "step": 27210 + }, + { + "epoch": 1.93, + "learning_rate": 1.679313126780102e-07, + "loss": 0.7304, + "step": 27220 + }, + { + "epoch": 1.93, + "learning_rate": 1.6473011440498454e-07, + "loss": 0.6865, + "step": 27230 + }, + { + "epoch": 1.93, + "learning_rate": 1.615596211037601e-07, + "loss": 0.6943, + "step": 27240 + }, + { + "epoch": 1.93, + "learning_rate": 1.5841983669415695e-07, + "loss": 0.6896, + "step": 27250 + }, + { + "epoch": 1.93, + "learning_rate": 1.5531076505803655e-07, + "loss": 0.7158, + "step": 27260 + }, + { + "epoch": 1.93, + "learning_rate": 1.5223241003928524e-07, + "loss": 0.7259, + "step": 27270 + }, + { + "epoch": 1.93, + "learning_rate": 1.491847754438086e-07, + "loss": 0.715, + "step": 27280 + }, + { + "epoch": 1.93, + "learning_rate": 1.461678650395426e-07, + "loss": 0.6879, + "step": 27290 + }, + { + "epoch": 1.93, + "learning_rate": 1.4318168255642585e-07, + "loss": 0.6827, + "step": 27300 + }, + { + "epoch": 1.93, + "learning_rate": 1.4022623168641336e-07, + "loss": 0.7186, + "step": 27310 + }, + { + "epoch": 1.93, + "learning_rate": 1.3730151608346285e-07, + "loss": 0.692, + "step": 27320 + }, + { + "epoch": 1.93, + "learning_rate": 1.344075393635319e-07, + "loss": 0.7285, + "step": 27330 + }, + { + "epoch": 1.94, + "learning_rate": 1.3154430510457506e-07, + "loss": 0.7337, + "step": 27340 + }, + { + "epoch": 1.94, + "learning_rate": 1.2871181684654399e-07, + "loss": 0.6814, + "step": 27350 + }, + { + "epoch": 1.94, + "learning_rate": 1.2591007809136247e-07, + "loss": 0.7109, + "step": 27360 + }, + { + "epoch": 1.94, + "learning_rate": 1.2313909230295684e-07, + "loss": 0.6875, + "step": 27370 + }, + { + "epoch": 1.94, + "learning_rate": 1.2039886290722003e-07, + "loss": 0.6764, + "step": 27380 + }, + { + "epoch": 1.94, + "learning_rate": 1.1768939329201978e-07, + "loss": 0.6924, + "step": 27390 + }, + { + "epoch": 1.94, + "learning_rate": 1.1501068680719595e-07, + "loss": 0.7136, + "step": 27400 + }, + { + "epoch": 1.94, + "learning_rate": 1.1236274676456049e-07, + "loss": 0.7331, + "step": 27410 + }, + { + "epoch": 1.94, + "learning_rate": 1.1000590874080952e-07, + "loss": 0.737, + "step": 27420 + }, + { + "epoch": 1.94, + "learning_rate": 1.0741643392611355e-07, + "loss": 0.6887, + "step": 27430 + }, + { + "epoch": 1.94, + "learning_rate": 1.0485773494272321e-07, + "loss": 0.7165, + "step": 27440 + }, + { + "epoch": 1.94, + "learning_rate": 1.023298149540719e-07, + "loss": 0.6757, + "step": 27450 + }, + { + "epoch": 1.94, + "learning_rate": 9.983267708554289e-08, + "loss": 0.6937, + "step": 27460 + }, + { + "epoch": 1.94, + "learning_rate": 9.736632442446103e-08, + "loss": 0.6886, + "step": 27470 + }, + { + "epoch": 1.95, + "learning_rate": 9.493076002008994e-08, + "loss": 0.7121, + "step": 27480 + }, + { + "epoch": 1.95, + "learning_rate": 9.252598688362924e-08, + "loss": 0.7186, + "step": 27490 + }, + { + "epoch": 1.95, + "learning_rate": 9.0152007988209e-08, + "loss": 0.7159, + "step": 27500 + }, + { + "epoch": 1.95, + "learning_rate": 8.780882626888421e-08, + "loss": 0.716, + "step": 27510 + }, + { + "epoch": 1.95, + "learning_rate": 8.572629670220156e-08, + "loss": 0.7589, + "step": 27520 + }, + { + "epoch": 1.95, + "learning_rate": 8.34416375671776e-08, + "loss": 0.7078, + "step": 27530 + }, + { + "epoch": 1.95, + "learning_rate": 8.118778390458492e-08, + "loss": 0.7089, + "step": 27540 + }, + { + "epoch": 1.95, + "learning_rate": 7.89647385009723e-08, + "loss": 0.723, + "step": 27550 + }, + { + "epoch": 1.95, + "learning_rate": 7.677250410478287e-08, + "loss": 0.7296, + "step": 27560 + }, + { + "epoch": 1.95, + "learning_rate": 7.46110834263819e-08, + "loss": 0.7196, + "step": 27570 + }, + { + "epoch": 1.95, + "learning_rate": 7.248047913802902e-08, + "loss": 0.7115, + "step": 27580 + }, + { + "epoch": 1.95, + "learning_rate": 7.038069387388658e-08, + "loss": 0.7485, + "step": 27590 + }, + { + "epoch": 1.95, + "learning_rate": 6.83117302300168e-08, + "loss": 0.6943, + "step": 27600 + }, + { + "epoch": 1.95, + "learning_rate": 6.627359076437357e-08, + "loss": 0.669, + "step": 27610 + }, + { + "epoch": 1.96, + "learning_rate": 6.426627799679674e-08, + "loss": 0.7157, + "step": 27620 + }, + { + "epoch": 1.96, + "learning_rate": 6.228979440902339e-08, + "loss": 0.6938, + "step": 27630 + }, + { + "epoch": 1.96, + "learning_rate": 6.034414244466547e-08, + "loss": 0.7006, + "step": 27640 + }, + { + "epoch": 1.96, + "learning_rate": 5.842932450922656e-08, + "loss": 0.6623, + "step": 27650 + }, + { + "epoch": 1.96, + "learning_rate": 5.6545342970076855e-08, + "loss": 0.6881, + "step": 27660 + }, + { + "epoch": 1.96, + "learning_rate": 5.4692200156472583e-08, + "loss": 0.6943, + "step": 27670 + }, + { + "epoch": 1.96, + "learning_rate": 5.286989835953382e-08, + "loss": 0.7158, + "step": 27680 + }, + { + "epoch": 1.96, + "learning_rate": 5.107843983226113e-08, + "loss": 0.7247, + "step": 27690 + }, + { + "epoch": 1.96, + "learning_rate": 4.931782678951058e-08, + "loss": 0.7313, + "step": 27700 + }, + { + "epoch": 1.96, + "learning_rate": 4.7588061408013195e-08, + "loss": 0.7065, + "step": 27710 + }, + { + "epoch": 1.96, + "learning_rate": 4.588914582635273e-08, + "loss": 0.7159, + "step": 27720 + }, + { + "epoch": 1.96, + "learning_rate": 4.422108214497678e-08, + "loss": 0.7211, + "step": 27730 + }, + { + "epoch": 1.96, + "learning_rate": 4.2583872426185665e-08, + "loss": 0.7073, + "step": 27740 + }, + { + "epoch": 1.96, + "learning_rate": 4.097751869414079e-08, + "loss": 0.7067, + "step": 27750 + }, + { + "epoch": 1.96, + "learning_rate": 3.940202293484518e-08, + "loss": 0.7176, + "step": 27760 + }, + { + "epoch": 1.97, + "learning_rate": 3.785738709616016e-08, + "loss": 0.7229, + "step": 27770 + }, + { + "epoch": 1.97, + "learning_rate": 3.634361308778866e-08, + "loss": 0.6988, + "step": 27780 + }, + { + "epoch": 1.97, + "learning_rate": 3.486070278127529e-08, + "loss": 0.7275, + "step": 27790 + }, + { + "epoch": 1.97, + "learning_rate": 3.3408658010011805e-08, + "loss": 0.6845, + "step": 27800 + }, + { + "epoch": 1.97, + "learning_rate": 3.198748056922607e-08, + "loss": 0.733, + "step": 27810 + }, + { + "epoch": 1.97, + "learning_rate": 3.0597172215982014e-08, + "loss": 0.7463, + "step": 27820 + }, + { + "epoch": 1.97, + "learning_rate": 2.9237734669185223e-08, + "loss": 0.725, + "step": 27830 + }, + { + "epoch": 1.97, + "learning_rate": 2.7909169609571796e-08, + "loss": 0.7233, + "step": 27840 + }, + { + "epoch": 1.97, + "learning_rate": 2.6611478679700043e-08, + "loss": 0.7196, + "step": 27850 + }, + { + "epoch": 1.97, + "learning_rate": 2.5344663483967135e-08, + "loss": 0.7174, + "step": 27860 + }, + { + "epoch": 1.97, + "learning_rate": 2.4108725588595227e-08, + "loss": 0.6863, + "step": 27870 + }, + { + "epoch": 1.97, + "learning_rate": 2.290366652163145e-08, + "loss": 0.683, + "step": 27880 + }, + { + "epoch": 1.97, + "learning_rate": 2.1729487772945145e-08, + "loss": 0.7095, + "step": 27890 + }, + { + "epoch": 1.97, + "learning_rate": 2.0586190794222303e-08, + "loss": 0.6825, + "step": 27900 + }, + { + "epoch": 1.98, + "learning_rate": 1.947377699897668e-08, + "loss": 0.7083, + "step": 27910 + }, + { + "epoch": 1.98, + "learning_rate": 1.8392247762535898e-08, + "loss": 0.7009, + "step": 27920 + }, + { + "epoch": 1.98, + "learning_rate": 1.734160442204147e-08, + "loss": 0.6894, + "step": 27930 + }, + { + "epoch": 1.98, + "learning_rate": 1.6321848276454332e-08, + "loss": 0.731, + "step": 27940 + }, + { + "epoch": 1.98, + "learning_rate": 1.5332980586543756e-08, + "loss": 0.6969, + "step": 27950 + }, + { + "epoch": 1.98, + "learning_rate": 1.4375002574890106e-08, + "loss": 0.7128, + "step": 27960 + }, + { + "epoch": 1.98, + "learning_rate": 1.3447915425890411e-08, + "loss": 0.6867, + "step": 27970 + }, + { + "epoch": 1.98, + "learning_rate": 1.2551720285738922e-08, + "loss": 0.7137, + "step": 27980 + }, + { + "epoch": 1.98, + "learning_rate": 1.1686418262443766e-08, + "loss": 0.6866, + "step": 27990 + }, + { + "epoch": 1.98, + "learning_rate": 1.0852010425818627e-08, + "loss": 0.7063, + "step": 28000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 6.815844041867919e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-28000/training_args.bin b/checkpoint-28000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-28000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-3000/README.md b/checkpoint-3000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-3000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-3000/adapter_config.json b/checkpoint-3000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-3000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-3000/adapter_model.bin b/checkpoint-3000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..afb0851ae10802e87d74556fc6962f4f38e0f2d4 --- /dev/null +++ b/checkpoint-3000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e96293450f7ca416f6ece3c033b5449a218ffc0d062f3e9e28c3c208bb70d657 +size 16821197 diff --git a/checkpoint-3000/finetuning_args.json b/checkpoint-3000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-3000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-3000/optimizer.pt b/checkpoint-3000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..e92e58e870c3386c7d8a9e8d39416299ab782cb5 --- /dev/null +++ b/checkpoint-3000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:940f32213d297a0b46470915f3a86b02fa20a4a0cd1d4697c077526f292f9404 +size 33661637 diff --git a/checkpoint-3000/rng_state.pth b/checkpoint-3000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..611bd9f6cde83f8abc8f47953ac44358833c37d7 --- /dev/null +++ b/checkpoint-3000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85b4e491b60c0e83c1b6d116d6b6b2e9c2a8d6fe38003cb8f5b9454380c7ced1 +size 18663 diff --git a/checkpoint-3000/scheduler.pt b/checkpoint-3000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..3ea5419d46396af856abba81d716d532c435df15 --- /dev/null +++ b/checkpoint-3000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:519e5ed6c75222ee0bbbca12b6020c2d298be39949ee9f1295e15bd825b58d26 +size 627 diff --git a/checkpoint-3000/trainer_state.json b/checkpoint-3000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..53f5242773d1b8c173a3d7036bf942e95bf73b94 --- /dev/null +++ b/checkpoint-3000/trainer_state.json @@ -0,0 +1,1816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.21234803843499495, + "global_step": 3000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 7.302283371374182e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-3000/training_args.bin b/checkpoint-3000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-3000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-4000/README.md b/checkpoint-4000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-4000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-4000/adapter_config.json b/checkpoint-4000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-4000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-4000/adapter_model.bin b/checkpoint-4000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..fb9ea89c06764dbd3218c4b2b1b71ca20c494a04 --- /dev/null +++ b/checkpoint-4000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:37715395cffa291b8228c729383efd599cf4629220e348a8976fa840a104a5c0 +size 16821197 diff --git a/checkpoint-4000/finetuning_args.json b/checkpoint-4000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-4000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-4000/optimizer.pt b/checkpoint-4000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..c1ad3f9ec780f8fae4d707b5a933b8b36ea3945b --- /dev/null +++ b/checkpoint-4000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9a5951a643a2d804f35a01938e125b422dba701e0cc77462081921c9f593282c +size 33661637 diff --git a/checkpoint-4000/rng_state.pth b/checkpoint-4000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..cab40de47fde1777f54c3fec0b0f6ed55126df40 --- /dev/null +++ b/checkpoint-4000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8bab576cdd2d8c58c7e895dd333946d6595c13e99e5dd1076a30042b9281f92e +size 18663 diff --git a/checkpoint-4000/scheduler.pt b/checkpoint-4000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..10ff3f2cb081fe22e6f29b6e018c2606740db7f1 --- /dev/null +++ b/checkpoint-4000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6552a0dc8fa0b4a9358e2c8e00df74a77f2578023054ad347a79c8ce2e914962 +size 627 diff --git a/checkpoint-4000/trainer_state.json b/checkpoint-4000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..f3244bfa18c08b643ef369d8f9674acac9ec8cff --- /dev/null +++ b/checkpoint-4000/trainer_state.json @@ -0,0 +1,2416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.2831307179133266, + "global_step": 4000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 9.734512680789606e+17, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-4000/training_args.bin b/checkpoint-4000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-4000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-5000/README.md b/checkpoint-5000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-5000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-5000/adapter_config.json b/checkpoint-5000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-5000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-5000/adapter_model.bin b/checkpoint-5000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..e8514cf196b8761de96eb9b2e8e5ac8e8261e196 --- /dev/null +++ b/checkpoint-5000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8483cbdbef059b2fe0940f357d6912ac9c13a7391fe9230a7814754c22c25a55 +size 16821197 diff --git a/checkpoint-5000/finetuning_args.json b/checkpoint-5000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-5000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-5000/optimizer.pt b/checkpoint-5000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..f17cc42a42c9226f5c5e3ebc0bedd189a70dc9a5 --- /dev/null +++ b/checkpoint-5000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cee481ba47821eed09335acbd20956672a270403b1437637f7a9bfdeaf97d2ee +size 33661637 diff --git a/checkpoint-5000/rng_state.pth b/checkpoint-5000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..dc5c5433645f76c4cdf77326c09f79972c8feca6 --- /dev/null +++ b/checkpoint-5000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:20bc0b6e3b2c00f7835d5cd6eb55e8b5587389b1af94e8cc387a6935c8d8bd72 +size 18663 diff --git a/checkpoint-5000/scheduler.pt b/checkpoint-5000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..67b57827289a46d76fbb5a4f834b952548b06add --- /dev/null +++ b/checkpoint-5000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b91a269db70ef77ae8e58adb7bd69cfec71f7abe8cb557f11062905fbd4b1f0d +size 627 diff --git a/checkpoint-5000/trainer_state.json b/checkpoint-5000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5e833743ff9aa3478f4a1f3e4530bb91f60f297e --- /dev/null +++ b/checkpoint-5000/trainer_state.json @@ -0,0 +1,3016 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.3539133973916583, + "global_step": 5000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 1.2173968916925645e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-5000/training_args.bin b/checkpoint-5000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-5000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-6000/README.md b/checkpoint-6000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-6000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-6000/adapter_config.json b/checkpoint-6000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-6000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-6000/adapter_model.bin b/checkpoint-6000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..37e60b40bc52266cf08a16a342cfba96c9a26333 --- /dev/null +++ b/checkpoint-6000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dabc17807abd82eda1233467224b81be5dcf92542a8844971ef79bb8ba761ba7 +size 16821197 diff --git a/checkpoint-6000/finetuning_args.json b/checkpoint-6000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-6000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-6000/optimizer.pt b/checkpoint-6000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..0ed37d61fd4bf727156391b02565c470ca8c9d1a --- /dev/null +++ b/checkpoint-6000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bce4a2e424a09dd36bf84fe62c28c7e8f1d63d9b900ac3e91cac18a37f5b268d +size 33661637 diff --git a/checkpoint-6000/rng_state.pth b/checkpoint-6000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..7b141a1fc264d08cc02907fc250612ef7e9e18d2 --- /dev/null +++ b/checkpoint-6000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd504c922ddee92afdd3bb8c7794fbc496d5c62f16ecaa4be4040158a63b2927 +size 18663 diff --git a/checkpoint-6000/scheduler.pt b/checkpoint-6000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..b59f8b3598defafe64f0a5cda37dbf4c061704e9 --- /dev/null +++ b/checkpoint-6000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:67c39944ec75b4388778b7b258ed0ff059d8a9651c41060ef2f0261a83305b5b +size 627 diff --git a/checkpoint-6000/trainer_state.json b/checkpoint-6000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..5bb2a9efab71f5860d926ca75e72e28ba154401f --- /dev/null +++ b/checkpoint-6000/trainer_state.json @@ -0,0 +1,3616 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4246960768699899, + "global_step": 6000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 1.4610931422572052e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-6000/training_args.bin b/checkpoint-6000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-6000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-7000/README.md b/checkpoint-7000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-7000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-7000/adapter_config.json b/checkpoint-7000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-7000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-7000/adapter_model.bin b/checkpoint-7000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..dcb15b1a0f92798776bd2ded3cfe6686661980b0 --- /dev/null +++ b/checkpoint-7000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d18929b16616098cc0658d4389b12f929a681e742d53f885c55a1766a82a9c85 +size 16821197 diff --git a/checkpoint-7000/finetuning_args.json b/checkpoint-7000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-7000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-7000/optimizer.pt b/checkpoint-7000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..776fed165c0d5f15f4dacaaa9ed813fa04a7e07e --- /dev/null +++ b/checkpoint-7000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bf6a66aa524ba9391d41151f22b6076e8e7605f44cff6327c003ca48821bd598 +size 33661637 diff --git a/checkpoint-7000/rng_state.pth b/checkpoint-7000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..fbb8f59aa3d1547247ca3eb974d6b0885fd26500 --- /dev/null +++ b/checkpoint-7000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:95050e29eaa9231fea44586cd7f354122cc88cb75f5d0fa259c2f0346e288bcc +size 18663 diff --git a/checkpoint-7000/scheduler.pt b/checkpoint-7000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a5447cf44982c8075f7ed019b19d8542a7047f04 --- /dev/null +++ b/checkpoint-7000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ba99ea746a7df616f56584e60e11718ee2229d82d47170fdc1c762f6b227462a +size 627 diff --git a/checkpoint-7000/trainer_state.json b/checkpoint-7000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ccd9f0ec8498e1872879a707704060e24c544d82 --- /dev/null +++ b/checkpoint-7000/trainer_state.json @@ -0,0 +1,4216 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.49547875634832156, + "global_step": 7000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 1.7046928827592212e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-7000/training_args.bin b/checkpoint-7000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-7000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-8000/README.md b/checkpoint-8000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-8000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-8000/adapter_config.json b/checkpoint-8000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-8000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-8000/adapter_model.bin b/checkpoint-8000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad52aa911b39683e4a1f813c09b561602ba659d6 --- /dev/null +++ b/checkpoint-8000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fe9c0de1b51381586bfde8727e1507a539f31847071a7ded3362582c9d96806c +size 16821197 diff --git a/checkpoint-8000/finetuning_args.json b/checkpoint-8000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-8000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-8000/optimizer.pt b/checkpoint-8000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..a3ac97476dc745a08416c70c134f62dfe98704f2 --- /dev/null +++ b/checkpoint-8000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3da82a62f5cea5c64ffa655f0942e901a9900c2ea5f63b8b474fafddadc2aae3 +size 33661637 diff --git a/checkpoint-8000/rng_state.pth b/checkpoint-8000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..50c94e12e6276e8842bf0fcb158e4366461ee475 --- /dev/null +++ b/checkpoint-8000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1100768a391e122298f37c7c9097582fdea6dd7517b8dc52a582af501cfb3271 +size 18663 diff --git a/checkpoint-8000/scheduler.pt b/checkpoint-8000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..cd11154850717e1b385b8b1923b7bcfcc2e24c64 --- /dev/null +++ b/checkpoint-8000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a9a07642cc803ae705b1cbcb88efcc6706f4c9f4325a9b06838b32fb388bb818 +size 627 diff --git a/checkpoint-8000/trainer_state.json b/checkpoint-8000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..ca1203928700622ad47d692612a6b6d2b950e78b --- /dev/null +++ b/checkpoint-8000/trainer_state.json @@ -0,0 +1,4816 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.5662614358266532, + "global_step": 8000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 1.9469341967777792e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-8000/training_args.bin b/checkpoint-8000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-8000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/checkpoint-9000/README.md b/checkpoint-9000/README.md new file mode 100644 index 0000000000000000000000000000000000000000..97b11d22ef295d896f095aca16f96b41531e9ed7 --- /dev/null +++ b/checkpoint-9000/README.md @@ -0,0 +1,20 @@ +--- +library_name: peft +--- +## Training procedure + + +The following `bitsandbytes` quantization config was used during training: +- load_in_8bit: False +- load_in_4bit: True +- llm_int8_threshold: 6.0 +- llm_int8_skip_modules: None +- llm_int8_enable_fp32_cpu_offload: False +- llm_int8_has_fp16_weight: False +- bnb_4bit_quant_type: nf4 +- bnb_4bit_use_double_quant: True +- bnb_4bit_compute_dtype: float16 +### Framework versions + + +- PEFT 0.4.0 diff --git a/checkpoint-9000/adapter_config.json b/checkpoint-9000/adapter_config.json new file mode 100644 index 0000000000000000000000000000000000000000..5a626b5a4361e575a3b10980e75841d933625faf --- /dev/null +++ b/checkpoint-9000/adapter_config.json @@ -0,0 +1,21 @@ +{ + "auto_mapping": null, + "base_model_name_or_path": "./Llama-2-7b-chat-hf", + "bias": "none", + "fan_in_fan_out": false, + "inference_mode": true, + "init_lora_weights": true, + "layers_pattern": null, + "layers_to_transform": null, + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "modules_to_save": null, + "peft_type": "LORA", + "r": 8, + "revision": null, + "target_modules": [ + "q_proj", + "v_proj" + ], + "task_type": "CAUSAL_LM" +} \ No newline at end of file diff --git a/checkpoint-9000/adapter_model.bin b/checkpoint-9000/adapter_model.bin new file mode 100644 index 0000000000000000000000000000000000000000..50cf0a39ab1a2fd782a9864f9a4995ff2a341597 --- /dev/null +++ b/checkpoint-9000/adapter_model.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f7a14d58345cc2d57152835384880fe3d3f4b87f491aa84d58c75ff47e7f6d72 +size 16821197 diff --git a/checkpoint-9000/finetuning_args.json b/checkpoint-9000/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/checkpoint-9000/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/checkpoint-9000/optimizer.pt b/checkpoint-9000/optimizer.pt new file mode 100644 index 0000000000000000000000000000000000000000..b7d36b378d0f3fb0904d8f02641cd4fc6bc2ffde --- /dev/null +++ b/checkpoint-9000/optimizer.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d5d35bdffe2c0babe12a3ed28a694e9edb608f16b29f7e810783d7933179b77 +size 33661637 diff --git a/checkpoint-9000/rng_state.pth b/checkpoint-9000/rng_state.pth new file mode 100644 index 0000000000000000000000000000000000000000..0d974663c620c76d58c2dc5a1f306da4680c9c9e --- /dev/null +++ b/checkpoint-9000/rng_state.pth @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8dc5fad2078e90bc1d6afe885dbc9bc4383902dc7dc94d234ba8659b8bc8114a +size 18663 diff --git a/checkpoint-9000/scheduler.pt b/checkpoint-9000/scheduler.pt new file mode 100644 index 0000000000000000000000000000000000000000..a10ef3f1dd6ccb91526e098177eb6152bdd75cb1 --- /dev/null +++ b/checkpoint-9000/scheduler.pt @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:836867f0b5de4091d17ea8ade1d8c5e228e49150980392c5eb93b080404fd9c4 +size 627 diff --git a/checkpoint-9000/trainer_state.json b/checkpoint-9000/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..0bce190d6c09d554f879dc376cbcaf5be0fd4778 --- /dev/null +++ b/checkpoint-9000/trainer_state.json @@ -0,0 +1,5416 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.6370441153049848, + "global_step": 9000, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 2.1906216217214976e+18, + "trial_name": null, + "trial_params": null +} diff --git a/checkpoint-9000/training_args.bin b/checkpoint-9000/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/checkpoint-9000/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/finetuning_args.json b/finetuning_args.json new file mode 100644 index 0000000000000000000000000000000000000000..d01efc206b59c6f88548e8f3940579f2ed2af33b --- /dev/null +++ b/finetuning_args.json @@ -0,0 +1,16 @@ +{ + "dpo_beta": 0.1, + "finetuning_type": "lora", + "lora_alpha": 32.0, + "lora_dropout": 0.1, + "lora_rank": 8, + "lora_target": [ + "q_proj", + "v_proj" + ], + "name_module_trainable": "mlp", + "num_hidden_layers": 32, + "num_layer_trainable": 3, + "ppo_score_norm": false, + "resume_lora_training": true +} diff --git a/train_results.json b/train_results.json new file mode 100644 index 0000000000000000000000000000000000000000..719e3a99ae710467091fa106314e8877f34c3e8d --- /dev/null +++ b/train_results.json @@ -0,0 +1,7 @@ +{ + "epoch": 2.0, + "train_loss": 0.7399377094979515, + "train_runtime": 176487.3633, + "train_samples_per_second": 2.562, + "train_steps_per_second": 0.16 +} \ No newline at end of file diff --git a/trainer_log.jsonl b/trainer_log.jsonl new file mode 100644 index 0000000000000000000000000000000000000000..b7a4687b8f3d13e2286e45a1f41e49180d796f8c --- /dev/null +++ b/trainer_log.jsonl @@ -0,0 +1,2826 @@ +{"current_steps": 10, "total_steps": 28254, "loss": 1.3539, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999998454568244e-05, "epoch": 0.0, "percentage": 0.04, "elapsed_time": "0:01:02", "remaining_time": "2 days, 0:57:23"} +{"current_steps": 20, "total_steps": 28254, "loss": 1.1833, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9999938182748876e-05, "epoch": 0.0, "percentage": 0.07, "elapsed_time": "0:02:02", "remaining_time": "2 days, 0:08:30"} +{"current_steps": 30, "total_steps": 28254, "loss": 1.173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9999870029288556e-05, "epoch": 0.0, "percentage": 0.11, "elapsed_time": "0:03:02", "remaining_time": "1 day, 23:38:23"} +{"current_steps": 40, "total_steps": 28254, "loss": 1.0772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999976494017406e-05, "epoch": 0.0, "percentage": 0.14, "elapsed_time": "0:04:04", "remaining_time": "1 day, 23:57:01"} +{"current_steps": 50, "total_steps": 28254, "loss": 1.0715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999962894271507e-05, "epoch": 0.0, "percentage": 0.18, "elapsed_time": "0:05:06", "remaining_time": "2 days, 0:02:45"} +{"current_steps": 60, "total_steps": 28254, "loss": 1.0268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9999462037079705e-05, "epoch": 0.0, "percentage": 0.21, "elapsed_time": "0:06:08", "remaining_time": "2 days, 0:07:47"} +{"current_steps": 70, "total_steps": 28254, "loss": 0.9807, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999926422347434e-05, "epoch": 0.0, "percentage": 0.25, "elapsed_time": "0:07:09", "remaining_time": "1 day, 23:59:59"} +{"current_steps": 80, "total_steps": 28254, "loss": 0.9862, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999903550214352e-05, "epoch": 0.01, "percentage": 0.28, "elapsed_time": "0:08:10", "remaining_time": "1 day, 23:56:47"} +{"current_steps": 90, "total_steps": 28254, "loss": 0.9725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999877587337004e-05, "epoch": 0.01, "percentage": 0.32, "elapsed_time": "0:09:12", "remaining_time": "1 day, 23:59:05"} +{"current_steps": 100, "total_steps": 28254, "loss": 0.9993, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999848533747488e-05, "epoch": 0.01, "percentage": 0.35, "elapsed_time": "0:10:13", "remaining_time": "1 day, 23:59:57"} +{"current_steps": 110, "total_steps": 28254, "loss": 0.9596, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999816389481725e-05, "epoch": 0.01, "percentage": 0.39, "elapsed_time": "0:11:16", "remaining_time": "2 days, 0:04:12"} +{"current_steps": 120, "total_steps": 28254, "loss": 0.979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999781154579456e-05, "epoch": 0.01, "percentage": 0.42, "elapsed_time": "0:12:19", "remaining_time": "2 days, 0:08:22"} +{"current_steps": 130, "total_steps": 28254, "loss": 0.9748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9997428290842444e-05, "epoch": 0.01, "percentage": 0.46, "elapsed_time": "0:13:20", "remaining_time": "2 days, 0:07:38"} +{"current_steps": 140, "total_steps": 28254, "loss": 0.9309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999701413043471e-05, "epoch": 0.01, "percentage": 0.5, "elapsed_time": "0:14:23", "remaining_time": "2 days, 0:10:52"} +{"current_steps": 150, "total_steps": 28254, "loss": 0.9143, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999656906508344e-05, "epoch": 0.01, "percentage": 0.53, "elapsed_time": "0:15:27", "remaining_time": "2 days, 0:17:03"} +{"current_steps": 160, "total_steps": 28254, "loss": 0.9439, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999609309533887e-05, "epoch": 0.01, "percentage": 0.57, "elapsed_time": "0:16:32", "remaining_time": "2 days, 0:23:56"} +{"current_steps": 170, "total_steps": 28254, "loss": 0.9286, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999558622178947e-05, "epoch": 0.01, "percentage": 0.6, "elapsed_time": "0:17:32", "remaining_time": "2 days, 0:18:19"} +{"current_steps": 180, "total_steps": 28254, "loss": 0.9544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99950484450619e-05, "epoch": 0.01, "percentage": 0.64, "elapsed_time": "0:18:34", "remaining_time": "2 days, 0:17:16"} +{"current_steps": 190, "total_steps": 28254, "loss": 0.9355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999447976582104e-05, "epoch": 0.01, "percentage": 0.67, "elapsed_time": "0:19:37", "remaining_time": "2 days, 0:18:45"} +{"current_steps": 200, "total_steps": 28254, "loss": 0.9154, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999388018476998e-05, "epoch": 0.01, "percentage": 0.71, "elapsed_time": "0:20:37", "remaining_time": "2 days, 0:13:33"} +{"current_steps": 210, "total_steps": 28254, "loss": 0.9326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999324970265001e-05, "epoch": 0.01, "percentage": 0.74, "elapsed_time": "0:21:38", "remaining_time": "2 days, 0:10:00"} +{"current_steps": 220, "total_steps": 28254, "loss": 0.9215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.999258832024061e-05, "epoch": 0.02, "percentage": 0.78, "elapsed_time": "0:22:40", "remaining_time": "2 days, 0:09:34"} +{"current_steps": 230, "total_steps": 28254, "loss": 0.9281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99918960383595e-05, "epoch": 0.02, "percentage": 0.81, "elapsed_time": "0:23:43", "remaining_time": "2 days, 0:11:36"} +{"current_steps": 240, "total_steps": 28254, "loss": 0.935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9991172857862555e-05, "epoch": 0.02, "percentage": 0.85, "elapsed_time": "0:24:44", "remaining_time": "2 days, 0:08:14"} +{"current_steps": 250, "total_steps": 28254, "loss": 0.941, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99904187796439e-05, "epoch": 0.02, "percentage": 0.88, "elapsed_time": "0:25:47", "remaining_time": "2 days, 0:08:52"} +{"current_steps": 260, "total_steps": 28254, "loss": 0.9377, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9989633804635814e-05, "epoch": 0.02, "percentage": 0.92, "elapsed_time": "0:26:50", "remaining_time": "2 days, 0:10:19"} +{"current_steps": 270, "total_steps": 28254, "loss": 0.9014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9988817933808814e-05, "epoch": 0.02, "percentage": 0.96, "elapsed_time": "0:27:53", "remaining_time": "2 days, 0:09:58"} +{"current_steps": 280, "total_steps": 28254, "loss": 0.9323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9987971168171585e-05, "epoch": 0.02, "percentage": 0.99, "elapsed_time": "0:28:55", "remaining_time": "2 days, 0:09:54"} +{"current_steps": 290, "total_steps": 28254, "loss": 0.8987, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998709350877103e-05, "epoch": 0.02, "percentage": 1.03, "elapsed_time": "0:29:59", "remaining_time": "2 days, 0:11:25"} +{"current_steps": 300, "total_steps": 28254, "loss": 0.8933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998618495669224e-05, "epoch": 0.02, "percentage": 1.06, "elapsed_time": "0:31:02", "remaining_time": "2 days, 0:12:59"} +{"current_steps": 310, "total_steps": 28254, "loss": 0.893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9985245513058495e-05, "epoch": 0.02, "percentage": 1.1, "elapsed_time": "0:32:06", "remaining_time": "2 days, 0:14:05"} +{"current_steps": 320, "total_steps": 28254, "loss": 0.909, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9984275179031276e-05, "epoch": 0.02, "percentage": 1.13, "elapsed_time": "0:33:08", "remaining_time": "2 days, 0:13:04"} +{"current_steps": 330, "total_steps": 28254, "loss": 0.9235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998327395581025e-05, "epoch": 0.02, "percentage": 1.17, "elapsed_time": "0:34:11", "remaining_time": "2 days, 0:13:46"} +{"current_steps": 340, "total_steps": 28254, "loss": 0.8945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9982241844633265e-05, "epoch": 0.02, "percentage": 1.2, "elapsed_time": "0:35:14", "remaining_time": "2 days, 0:13:02"} +{"current_steps": 350, "total_steps": 28254, "loss": 0.9095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998117884677638e-05, "epoch": 0.02, "percentage": 1.24, "elapsed_time": "0:36:13", "remaining_time": "2 days, 0:08:13"} +{"current_steps": 360, "total_steps": 28254, "loss": 0.8919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.998008496355382e-05, "epoch": 0.03, "percentage": 1.27, "elapsed_time": "0:37:15", "remaining_time": "2 days, 0:06:38"} +{"current_steps": 370, "total_steps": 28254, "loss": 0.9088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9978960196318006e-05, "epoch": 0.03, "percentage": 1.31, "elapsed_time": "0:38:16", "remaining_time": "2 days, 0:04:20"} +{"current_steps": 380, "total_steps": 28254, "loss": 0.8985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997780454645954e-05, "epoch": 0.03, "percentage": 1.34, "elapsed_time": "0:39:19", "remaining_time": "2 days, 0:04:11"} +{"current_steps": 390, "total_steps": 28254, "loss": 0.8972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99766180154072e-05, "epoch": 0.03, "percentage": 1.38, "elapsed_time": "0:40:20", "remaining_time": "2 days, 0:01:52"} +{"current_steps": 400, "total_steps": 28254, "loss": 0.8983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9975400604627957e-05, "epoch": 0.03, "percentage": 1.42, "elapsed_time": "0:41:24", "remaining_time": "2 days, 0:03:21"} +{"current_steps": 410, "total_steps": 28254, "loss": 0.9115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9974152315626935e-05, "epoch": 0.03, "percentage": 1.45, "elapsed_time": "0:42:24", "remaining_time": "2 days, 0:00:02"} +{"current_steps": 420, "total_steps": 28254, "loss": 0.8957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997287314994746e-05, "epoch": 0.03, "percentage": 1.49, "elapsed_time": "0:43:28", "remaining_time": "2 days, 0:00:45"} +{"current_steps": 430, "total_steps": 28254, "loss": 0.8681, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.997156310917103e-05, "epoch": 0.03, "percentage": 1.52, "elapsed_time": "0:44:31", "remaining_time": "2 days, 0:01:07"} +{"current_steps": 440, "total_steps": 28254, "loss": 0.894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9970222194917296e-05, "epoch": 0.03, "percentage": 1.56, "elapsed_time": "0:45:34", "remaining_time": "2 days, 0:00:28"} +{"current_steps": 450, "total_steps": 28254, "loss": 0.8798, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.996885040884409e-05, "epoch": 0.03, "percentage": 1.59, "elapsed_time": "0:46:37", "remaining_time": "2 days, 0:01:03"} +{"current_steps": 460, "total_steps": 28254, "loss": 0.9034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.996744775264743e-05, "epoch": 0.03, "percentage": 1.63, "elapsed_time": "0:47:37", "remaining_time": "1 day, 23:57:37"} +{"current_steps": 470, "total_steps": 28254, "loss": 0.9033, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.996601422806147e-05, "epoch": 0.03, "percentage": 1.66, "elapsed_time": "0:48:39", "remaining_time": "1 day, 23:56:18"} +{"current_steps": 480, "total_steps": 28254, "loss": 0.8841, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9964549836858536e-05, "epoch": 0.03, "percentage": 1.7, "elapsed_time": "0:49:41", "remaining_time": "1 day, 23:54:57"} +{"current_steps": 490, "total_steps": 28254, "loss": 0.8877, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9963054580849134e-05, "epoch": 0.03, "percentage": 1.73, "elapsed_time": "0:50:42", "remaining_time": "1 day, 23:53:33"} +{"current_steps": 500, "total_steps": 28254, "loss": 0.8729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.996152846188191e-05, "epoch": 0.04, "percentage": 1.77, "elapsed_time": "0:51:47", "remaining_time": "1 day, 23:54:44"} +{"current_steps": 510, "total_steps": 28254, "loss": 0.8853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.995997148184369e-05, "epoch": 0.04, "percentage": 1.81, "elapsed_time": "0:52:50", "remaining_time": "1 day, 23:54:40"} +{"current_steps": 520, "total_steps": 28254, "loss": 0.8837, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9958383642659414e-05, "epoch": 0.04, "percentage": 1.84, "elapsed_time": "0:53:52", "remaining_time": "1 day, 23:53:26"} +{"current_steps": 530, "total_steps": 28254, "loss": 0.8833, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.995676494629221e-05, "epoch": 0.04, "percentage": 1.88, "elapsed_time": "0:54:55", "remaining_time": "1 day, 23:52:50"} +{"current_steps": 540, "total_steps": 28254, "loss": 0.8843, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9955115394743354e-05, "epoch": 0.04, "percentage": 1.91, "elapsed_time": "0:55:57", "remaining_time": "1 day, 23:51:38"} +{"current_steps": 550, "total_steps": 28254, "loss": 0.892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.995343499005225e-05, "epoch": 0.04, "percentage": 1.95, "elapsed_time": "0:56:59", "remaining_time": "1 day, 23:50:20"} +{"current_steps": 560, "total_steps": 28254, "loss": 0.8575, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.995172373429646e-05, "epoch": 0.04, "percentage": 1.98, "elapsed_time": "0:58:01", "remaining_time": "1 day, 23:49:28"} +{"current_steps": 570, "total_steps": 28254, "loss": 0.8311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9949981629591705e-05, "epoch": 0.04, "percentage": 2.02, "elapsed_time": "0:59:02", "remaining_time": "1 day, 23:47:37"} +{"current_steps": 580, "total_steps": 28254, "loss": 0.8669, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.99482086780918e-05, "epoch": 0.04, "percentage": 2.05, "elapsed_time": "1:00:06", "remaining_time": "1 day, 23:47:56"} +{"current_steps": 590, "total_steps": 28254, "loss": 0.8388, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.994640488198874e-05, "epoch": 0.04, "percentage": 2.09, "elapsed_time": "1:01:09", "remaining_time": "1 day, 23:47:21"} +{"current_steps": 600, "total_steps": 28254, "loss": 0.8424, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.994457024351264e-05, "epoch": 0.04, "percentage": 2.12, "elapsed_time": "1:02:10", "remaining_time": "1 day, 23:45:27"} +{"current_steps": 610, "total_steps": 28254, "loss": 0.8676, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.994270476493175e-05, "epoch": 0.04, "percentage": 2.16, "elapsed_time": "1:03:13", "remaining_time": "1 day, 23:45:12"} +{"current_steps": 620, "total_steps": 28254, "loss": 0.8598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.994080844855243e-05, "epoch": 0.04, "percentage": 2.19, "elapsed_time": "1:04:14", "remaining_time": "1 day, 23:43:24"} +{"current_steps": 630, "total_steps": 28254, "loss": 0.824, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.993888129671921e-05, "epoch": 0.04, "percentage": 2.23, "elapsed_time": "1:05:15", "remaining_time": "1 day, 23:41:05"} +{"current_steps": 640, "total_steps": 28254, "loss": 0.8652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.993692331181469e-05, "epoch": 0.05, "percentage": 2.27, "elapsed_time": "1:06:16", "remaining_time": "1 day, 23:39:48"} +{"current_steps": 650, "total_steps": 28254, "loss": 0.8533, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.993493449625963e-05, "epoch": 0.05, "percentage": 2.3, "elapsed_time": "1:07:20", "remaining_time": "1 day, 23:39:30"} +{"current_steps": 660, "total_steps": 28254, "loss": 0.8677, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.993291485251288e-05, "epoch": 0.05, "percentage": 2.34, "elapsed_time": "1:08:22", "remaining_time": "1 day, 23:38:25"} +{"current_steps": 670, "total_steps": 28254, "loss": 0.8459, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.993086438307143e-05, "epoch": 0.05, "percentage": 2.37, "elapsed_time": "1:09:24", "remaining_time": "1 day, 23:37:25"} +{"current_steps": 680, "total_steps": 28254, "loss": 0.8626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9928783090470365e-05, "epoch": 0.05, "percentage": 2.41, "elapsed_time": "1:10:25", "remaining_time": "1 day, 23:35:24"} +{"current_steps": 690, "total_steps": 28254, "loss": 0.8127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.992667097728287e-05, "epoch": 0.05, "percentage": 2.44, "elapsed_time": "1:11:26", "remaining_time": "1 day, 23:34:02"} +{"current_steps": 700, "total_steps": 28254, "loss": 0.8716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.992452804612027e-05, "epoch": 0.05, "percentage": 2.48, "elapsed_time": "1:12:28", "remaining_time": "1 day, 23:32:37"} +{"current_steps": 710, "total_steps": 28254, "loss": 0.8544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.992235429963195e-05, "epoch": 0.05, "percentage": 2.51, "elapsed_time": "1:13:30", "remaining_time": "1 day, 23:31:38"} +{"current_steps": 720, "total_steps": 28254, "loss": 0.8562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.992014974050542e-05, "epoch": 0.05, "percentage": 2.55, "elapsed_time": "1:14:34", "remaining_time": "1 day, 23:31:44"} +{"current_steps": 730, "total_steps": 28254, "loss": 0.871, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.991791437146627e-05, "epoch": 0.05, "percentage": 2.58, "elapsed_time": "1:15:35", "remaining_time": "1 day, 23:30:23"} +{"current_steps": 740, "total_steps": 28254, "loss": 0.8453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9915648195278186e-05, "epoch": 0.05, "percentage": 2.62, "elapsed_time": "1:16:39", "remaining_time": "1 day, 23:30:11"} +{"current_steps": 750, "total_steps": 28254, "loss": 0.8524, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9913351214742945e-05, "epoch": 0.05, "percentage": 2.65, "elapsed_time": "1:17:42", "remaining_time": "1 day, 23:29:42"} +{"current_steps": 760, "total_steps": 28254, "loss": 0.8581, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.991102343270042e-05, "epoch": 0.05, "percentage": 2.69, "elapsed_time": "1:18:46", "remaining_time": "1 day, 23:30:02"} +{"current_steps": 770, "total_steps": 28254, "loss": 0.8477, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9908664852028545e-05, "epoch": 0.05, "percentage": 2.73, "elapsed_time": "1:19:50", "remaining_time": "1 day, 23:29:46"} +{"current_steps": 780, "total_steps": 28254, "loss": 0.8651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.990627547564335e-05, "epoch": 0.06, "percentage": 2.76, "elapsed_time": "1:20:54", "remaining_time": "1 day, 23:29:56"} +{"current_steps": 790, "total_steps": 28254, "loss": 0.8453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.990385530649891e-05, "epoch": 0.06, "percentage": 2.8, "elapsed_time": "1:21:58", "remaining_time": "1 day, 23:29:43"} +{"current_steps": 800, "total_steps": 28254, "loss": 0.8586, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9901404347587404e-05, "epoch": 0.06, "percentage": 2.83, "elapsed_time": "1:22:59", "remaining_time": "1 day, 23:28:12"} +{"current_steps": 810, "total_steps": 28254, "loss": 0.8746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9898922601939056e-05, "epoch": 0.06, "percentage": 2.87, "elapsed_time": "1:24:03", "remaining_time": "1 day, 23:28:11"} +{"current_steps": 820, "total_steps": 28254, "loss": 0.8652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.989641007262218e-05, "epoch": 0.06, "percentage": 2.9, "elapsed_time": "1:25:04", "remaining_time": "1 day, 23:26:11"} +{"current_steps": 830, "total_steps": 28254, "loss": 0.8531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.98938667627431e-05, "epoch": 0.06, "percentage": 2.94, "elapsed_time": "1:26:08", "remaining_time": "1 day, 23:26:07"} +{"current_steps": 840, "total_steps": 28254, "loss": 0.8686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.989129267544626e-05, "epoch": 0.06, "percentage": 2.97, "elapsed_time": "1:27:10", "remaining_time": "1 day, 23:25:13"} +{"current_steps": 850, "total_steps": 28254, "loss": 0.8692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.988868781391408e-05, "epoch": 0.06, "percentage": 3.01, "elapsed_time": "1:28:12", "remaining_time": "1 day, 23:23:40"} +{"current_steps": 860, "total_steps": 28254, "loss": 0.8274, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.988605218136711e-05, "epoch": 0.06, "percentage": 3.04, "elapsed_time": "1:29:15", "remaining_time": "1 day, 23:23:21"} +{"current_steps": 870, "total_steps": 28254, "loss": 0.8502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9883385781063876e-05, "epoch": 0.06, "percentage": 3.08, "elapsed_time": "1:30:18", "remaining_time": "1 day, 23:22:38"} +{"current_steps": 880, "total_steps": 28254, "loss": 0.8445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9880688616300975e-05, "epoch": 0.06, "percentage": 3.11, "elapsed_time": "1:31:19", "remaining_time": "1 day, 23:20:41"} +{"current_steps": 890, "total_steps": 28254, "loss": 0.8475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9877960690413035e-05, "epoch": 0.06, "percentage": 3.15, "elapsed_time": "1:32:21", "remaining_time": "1 day, 23:19:34"} +{"current_steps": 900, "total_steps": 28254, "loss": 0.8215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.987520200677271e-05, "epoch": 0.06, "percentage": 3.19, "elapsed_time": "1:33:23", "remaining_time": "1 day, 23:18:18"} +{"current_steps": 910, "total_steps": 28254, "loss": 0.8389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.987241256879071e-05, "epoch": 0.06, "percentage": 3.22, "elapsed_time": "1:34:24", "remaining_time": "1 day, 23:16:47"} +{"current_steps": 920, "total_steps": 28254, "loss": 0.8422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.986959237991571e-05, "epoch": 0.07, "percentage": 3.26, "elapsed_time": "1:35:26", "remaining_time": "1 day, 23:15:48"} +{"current_steps": 930, "total_steps": 28254, "loss": 0.8287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9866741443634455e-05, "epoch": 0.07, "percentage": 3.29, "elapsed_time": "1:36:28", "remaining_time": "1 day, 23:14:30"} +{"current_steps": 940, "total_steps": 28254, "loss": 0.8694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.986385976347169e-05, "epoch": 0.07, "percentage": 3.33, "elapsed_time": "1:37:29", "remaining_time": "1 day, 23:12:57"} +{"current_steps": 950, "total_steps": 28254, "loss": 0.847, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.986094734299016e-05, "epoch": 0.07, "percentage": 3.36, "elapsed_time": "1:38:33", "remaining_time": "1 day, 23:12:31"} +{"current_steps": 960, "total_steps": 28254, "loss": 0.8191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.985800418579063e-05, "epoch": 0.07, "percentage": 3.4, "elapsed_time": "1:39:33", "remaining_time": "1 day, 23:10:29"} +{"current_steps": 970, "total_steps": 28254, "loss": 0.8419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.985503029551184e-05, "epoch": 0.07, "percentage": 3.43, "elapsed_time": "1:40:36", "remaining_time": "1 day, 23:10:03"} +{"current_steps": 980, "total_steps": 28254, "loss": 0.8517, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.985202567583057e-05, "epoch": 0.07, "percentage": 3.47, "elapsed_time": "1:41:40", "remaining_time": "1 day, 23:09:44"} +{"current_steps": 990, "total_steps": 28254, "loss": 0.8653, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.984899033046155e-05, "epoch": 0.07, "percentage": 3.5, "elapsed_time": "1:42:42", "remaining_time": "1 day, 23:08:18"} +{"current_steps": 1000, "total_steps": 28254, "loss": 0.8349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9845924263157526e-05, "epoch": 0.07, "percentage": 3.54, "elapsed_time": "1:43:44", "remaining_time": "1 day, 23:07:09"} +{"current_steps": 1010, "total_steps": 28254, "loss": 0.8536, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.984282747770922e-05, "epoch": 0.07, "percentage": 3.57, "elapsed_time": "1:44:48", "remaining_time": "1 day, 23:06:54"} +{"current_steps": 1020, "total_steps": 28254, "loss": 0.8882, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.983969997794531e-05, "epoch": 0.07, "percentage": 3.61, "elapsed_time": "1:45:49", "remaining_time": "1 day, 23:05:41"} +{"current_steps": 1030, "total_steps": 28254, "loss": 0.8285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.983654176773248e-05, "epoch": 0.07, "percentage": 3.65, "elapsed_time": "1:46:51", "remaining_time": "1 day, 23:04:30"} +{"current_steps": 1040, "total_steps": 28254, "loss": 0.8503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.983335285097537e-05, "epoch": 0.07, "percentage": 3.68, "elapsed_time": "1:47:53", "remaining_time": "1 day, 23:03:05"} +{"current_steps": 1050, "total_steps": 28254, "loss": 0.8171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.983013323161657e-05, "epoch": 0.07, "percentage": 3.72, "elapsed_time": "1:48:57", "remaining_time": "1 day, 23:02:52"} +{"current_steps": 1060, "total_steps": 28254, "loss": 0.8398, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.982688291363666e-05, "epoch": 0.08, "percentage": 3.75, "elapsed_time": "1:50:00", "remaining_time": "1 day, 23:02:08"} +{"current_steps": 1070, "total_steps": 28254, "loss": 0.8222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.982360190105414e-05, "epoch": 0.08, "percentage": 3.79, "elapsed_time": "1:51:02", "remaining_time": "1 day, 23:01:03"} +{"current_steps": 1080, "total_steps": 28254, "loss": 0.8333, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.982029019792548e-05, "epoch": 0.08, "percentage": 3.82, "elapsed_time": "1:52:04", "remaining_time": "1 day, 23:00:05"} +{"current_steps": 1090, "total_steps": 28254, "loss": 0.8437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.981694780834508e-05, "epoch": 0.08, "percentage": 3.86, "elapsed_time": "1:53:05", "remaining_time": "1 day, 22:58:18"} +{"current_steps": 1100, "total_steps": 28254, "loss": 0.827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.981357473644531e-05, "epoch": 0.08, "percentage": 3.89, "elapsed_time": "1:54:05", "remaining_time": "1 day, 22:56:34"} +{"current_steps": 1110, "total_steps": 28254, "loss": 0.8216, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9810170986396434e-05, "epoch": 0.08, "percentage": 3.93, "elapsed_time": "1:55:08", "remaining_time": "1 day, 22:55:34"} +{"current_steps": 1120, "total_steps": 28254, "loss": 0.8253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.980673656240667e-05, "epoch": 0.08, "percentage": 3.96, "elapsed_time": "1:56:06", "remaining_time": "1 day, 22:53:04"} +{"current_steps": 1130, "total_steps": 28254, "loss": 0.8195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9803271468722146e-05, "epoch": 0.08, "percentage": 4.0, "elapsed_time": "1:57:09", "remaining_time": "1 day, 22:52:10"} +{"current_steps": 1140, "total_steps": 28254, "loss": 0.8394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9799775709626926e-05, "epoch": 0.08, "percentage": 4.03, "elapsed_time": "1:58:11", "remaining_time": "1 day, 22:51:05"} +{"current_steps": 1150, "total_steps": 28254, "loss": 0.8348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9796249289442966e-05, "epoch": 0.08, "percentage": 4.07, "elapsed_time": "1:59:13", "remaining_time": "1 day, 22:50:04"} +{"current_steps": 1160, "total_steps": 28254, "loss": 0.859, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9792692212530134e-05, "epoch": 0.08, "percentage": 4.11, "elapsed_time": "2:00:16", "remaining_time": "1 day, 22:49:06"} +{"current_steps": 1170, "total_steps": 28254, "loss": 0.8043, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.978910448328622e-05, "epoch": 0.08, "percentage": 4.14, "elapsed_time": "2:01:21", "remaining_time": "1 day, 22:49:15"} +{"current_steps": 1180, "total_steps": 28254, "loss": 0.8433, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.97854861061469e-05, "epoch": 0.08, "percentage": 4.18, "elapsed_time": "2:02:22", "remaining_time": "1 day, 22:47:41"} +{"current_steps": 1190, "total_steps": 28254, "loss": 0.8244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.978183708558571e-05, "epoch": 0.08, "percentage": 4.21, "elapsed_time": "2:03:27", "remaining_time": "1 day, 22:47:46"} +{"current_steps": 1200, "total_steps": 28254, "loss": 0.8379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.977815742611413e-05, "epoch": 0.08, "percentage": 4.25, "elapsed_time": "2:04:28", "remaining_time": "1 day, 22:46:07"} +{"current_steps": 1210, "total_steps": 28254, "loss": 0.8471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.977444713228147e-05, "epoch": 0.09, "percentage": 4.28, "elapsed_time": "2:05:29", "remaining_time": "1 day, 22:44:54"} +{"current_steps": 1220, "total_steps": 28254, "loss": 0.808, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9770706208674946e-05, "epoch": 0.09, "percentage": 4.32, "elapsed_time": "2:06:32", "remaining_time": "1 day, 22:44:09"} +{"current_steps": 1230, "total_steps": 28254, "loss": 0.8384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.976693465991963e-05, "epoch": 0.09, "percentage": 4.35, "elapsed_time": "2:07:34", "remaining_time": "1 day, 22:42:58"} +{"current_steps": 1240, "total_steps": 28254, "loss": 0.856, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9763132490678453e-05, "epoch": 0.09, "percentage": 4.39, "elapsed_time": "2:08:35", "remaining_time": "1 day, 22:41:27"} +{"current_steps": 1250, "total_steps": 28254, "loss": 0.8382, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.975929970565222e-05, "epoch": 0.09, "percentage": 4.42, "elapsed_time": "2:09:36", "remaining_time": "1 day, 22:39:47"} +{"current_steps": 1260, "total_steps": 28254, "loss": 0.8219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.975543630957957e-05, "epoch": 0.09, "percentage": 4.46, "elapsed_time": "2:10:38", "remaining_time": "1 day, 22:38:55"} +{"current_steps": 1270, "total_steps": 28254, "loss": 0.8384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.975154230723699e-05, "epoch": 0.09, "percentage": 4.49, "elapsed_time": "2:11:39", "remaining_time": "1 day, 22:37:32"} +{"current_steps": 1280, "total_steps": 28254, "loss": 0.8276, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9747617703438824e-05, "epoch": 0.09, "percentage": 4.53, "elapsed_time": "2:12:42", "remaining_time": "1 day, 22:36:40"} +{"current_steps": 1290, "total_steps": 28254, "loss": 0.8604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.974366250303723e-05, "epoch": 0.09, "percentage": 4.57, "elapsed_time": "2:13:46", "remaining_time": "1 day, 22:36:09"} +{"current_steps": 1300, "total_steps": 28254, "loss": 0.8471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.97396767109222e-05, "epoch": 0.09, "percentage": 4.6, "elapsed_time": "2:14:48", "remaining_time": "1 day, 22:35:06"} +{"current_steps": 1310, "total_steps": 28254, "loss": 0.8199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.973566033202156e-05, "epoch": 0.09, "percentage": 4.64, "elapsed_time": "2:15:51", "remaining_time": "1 day, 22:34:10"} +{"current_steps": 1320, "total_steps": 28254, "loss": 0.8243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.973161337130094e-05, "epoch": 0.09, "percentage": 4.67, "elapsed_time": "2:16:53", "remaining_time": "1 day, 22:33:20"} +{"current_steps": 1330, "total_steps": 28254, "loss": 0.7936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.972753583376376e-05, "epoch": 0.09, "percentage": 4.71, "elapsed_time": "2:17:54", "remaining_time": "1 day, 22:31:39"} +{"current_steps": 1340, "total_steps": 28254, "loss": 0.8231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.972342772445129e-05, "epoch": 0.09, "percentage": 4.74, "elapsed_time": "2:18:56", "remaining_time": "1 day, 22:30:38"} +{"current_steps": 1350, "total_steps": 28254, "loss": 0.8223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9719289048442566e-05, "epoch": 0.1, "percentage": 4.78, "elapsed_time": "2:19:56", "remaining_time": "1 day, 22:28:58"} +{"current_steps": 1360, "total_steps": 28254, "loss": 0.8174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.971511981085441e-05, "epoch": 0.1, "percentage": 4.81, "elapsed_time": "2:20:58", "remaining_time": "1 day, 22:27:54"} +{"current_steps": 1370, "total_steps": 28254, "loss": 0.8088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9710920016841455e-05, "epoch": 0.1, "percentage": 4.85, "elapsed_time": "2:21:58", "remaining_time": "1 day, 22:26:11"} +{"current_steps": 1380, "total_steps": 28254, "loss": 0.8149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9706689671596086e-05, "epoch": 0.1, "percentage": 4.88, "elapsed_time": "2:22:58", "remaining_time": "1 day, 22:24:24"} +{"current_steps": 1390, "total_steps": 28254, "loss": 0.8522, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.970242878034847e-05, "epoch": 0.1, "percentage": 4.92, "elapsed_time": "2:24:01", "remaining_time": "1 day, 22:23:31"} +{"current_steps": 1400, "total_steps": 28254, "loss": 0.8404, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.969813734836656e-05, "epoch": 0.1, "percentage": 4.96, "elapsed_time": "2:25:03", "remaining_time": "1 day, 22:22:17"} +{"current_steps": 1410, "total_steps": 28254, "loss": 0.8608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.969381538095602e-05, "epoch": 0.1, "percentage": 4.99, "elapsed_time": "2:26:04", "remaining_time": "1 day, 22:21:01"} +{"current_steps": 1420, "total_steps": 28254, "loss": 0.8232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.968946288346031e-05, "epoch": 0.1, "percentage": 5.03, "elapsed_time": "2:27:05", "remaining_time": "1 day, 22:19:46"} +{"current_steps": 1430, "total_steps": 28254, "loss": 0.8368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.968507986126063e-05, "epoch": 0.1, "percentage": 5.06, "elapsed_time": "2:28:08", "remaining_time": "1 day, 22:18:53"} +{"current_steps": 1440, "total_steps": 28254, "loss": 0.8154, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9680666319775884e-05, "epoch": 0.1, "percentage": 5.1, "elapsed_time": "2:29:13", "remaining_time": "1 day, 22:18:32"} +{"current_steps": 1450, "total_steps": 28254, "loss": 0.8379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.967622226446276e-05, "epoch": 0.1, "percentage": 5.13, "elapsed_time": "2:30:14", "remaining_time": "1 day, 22:17:22"} +{"current_steps": 1460, "total_steps": 28254, "loss": 0.8333, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9671747700815615e-05, "epoch": 0.1, "percentage": 5.17, "elapsed_time": "2:31:16", "remaining_time": "1 day, 22:16:10"} +{"current_steps": 1470, "total_steps": 28254, "loss": 0.8542, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.966724263436658e-05, "epoch": 0.1, "percentage": 5.2, "elapsed_time": "2:32:17", "remaining_time": "1 day, 22:14:43"} +{"current_steps": 1480, "total_steps": 28254, "loss": 0.8421, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9662707070685476e-05, "epoch": 0.1, "percentage": 5.24, "elapsed_time": "2:33:20", "remaining_time": "1 day, 22:13:55"} +{"current_steps": 1490, "total_steps": 28254, "loss": 0.7827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9658141015379805e-05, "epoch": 0.11, "percentage": 5.27, "elapsed_time": "2:34:22", "remaining_time": "1 day, 22:12:50"} +{"current_steps": 1500, "total_steps": 28254, "loss": 0.8659, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9653544474094805e-05, "epoch": 0.11, "percentage": 5.31, "elapsed_time": "2:35:27", "remaining_time": "1 day, 22:12:38"} +{"current_steps": 1510, "total_steps": 28254, "loss": 0.8166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9648917452513384e-05, "epoch": 0.11, "percentage": 5.34, "elapsed_time": "2:36:28", "remaining_time": "1 day, 22:11:17"} +{"current_steps": 1520, "total_steps": 28254, "loss": 0.8221, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.964425995635613e-05, "epoch": 0.11, "percentage": 5.38, "elapsed_time": "2:37:30", "remaining_time": "1 day, 22:10:20"} +{"current_steps": 1530, "total_steps": 28254, "loss": 0.8129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.963957199138134e-05, "epoch": 0.11, "percentage": 5.42, "elapsed_time": "2:38:32", "remaining_time": "1 day, 22:09:06"} +{"current_steps": 1540, "total_steps": 28254, "loss": 0.8171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.963485356338493e-05, "epoch": 0.11, "percentage": 5.45, "elapsed_time": "2:39:36", "remaining_time": "1 day, 22:08:34"} +{"current_steps": 1550, "total_steps": 28254, "loss": 0.7984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9630104678200526e-05, "epoch": 0.11, "percentage": 5.49, "elapsed_time": "2:40:37", "remaining_time": "1 day, 22:07:13"} +{"current_steps": 1560, "total_steps": 28254, "loss": 0.8109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.962532534169939e-05, "epoch": 0.11, "percentage": 5.52, "elapsed_time": "2:41:40", "remaining_time": "1 day, 22:06:38"} +{"current_steps": 1570, "total_steps": 28254, "loss": 0.8164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.962051555979042e-05, "epoch": 0.11, "percentage": 5.56, "elapsed_time": "2:42:42", "remaining_time": "1 day, 22:05:23"} +{"current_steps": 1580, "total_steps": 28254, "loss": 0.8063, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9615675338420174e-05, "epoch": 0.11, "percentage": 5.59, "elapsed_time": "2:43:44", "remaining_time": "1 day, 22:04:27"} +{"current_steps": 1590, "total_steps": 28254, "loss": 0.8123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.961080468357284e-05, "epoch": 0.11, "percentage": 5.63, "elapsed_time": "2:44:47", "remaining_time": "1 day, 22:03:27"} +{"current_steps": 1600, "total_steps": 28254, "loss": 0.8322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9605903601270234e-05, "epoch": 0.11, "percentage": 5.66, "elapsed_time": "2:45:47", "remaining_time": "1 day, 22:01:59"} +{"current_steps": 1610, "total_steps": 28254, "loss": 0.8256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.960097209757178e-05, "epoch": 0.11, "percentage": 5.7, "elapsed_time": "2:46:54", "remaining_time": "1 day, 22:02:04"} +{"current_steps": 1620, "total_steps": 28254, "loss": 0.8113, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.959601017857451e-05, "epoch": 0.11, "percentage": 5.73, "elapsed_time": "2:47:56", "remaining_time": "1 day, 22:00:58"} +{"current_steps": 1630, "total_steps": 28254, "loss": 0.8323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.959101785041309e-05, "epoch": 0.12, "percentage": 5.77, "elapsed_time": "2:48:58", "remaining_time": "1 day, 22:00:02"} +{"current_steps": 1640, "total_steps": 28254, "loss": 0.7911, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.958599511925975e-05, "epoch": 0.12, "percentage": 5.8, "elapsed_time": "2:50:02", "remaining_time": "1 day, 21:59:18"} +{"current_steps": 1650, "total_steps": 28254, "loss": 0.8175, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.958094199132432e-05, "epoch": 0.12, "percentage": 5.84, "elapsed_time": "2:51:02", "remaining_time": "1 day, 21:57:54"} +{"current_steps": 1660, "total_steps": 28254, "loss": 0.8114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.957585847285422e-05, "epoch": 0.12, "percentage": 5.88, "elapsed_time": "2:52:07", "remaining_time": "1 day, 21:57:34"} +{"current_steps": 1670, "total_steps": 28254, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.957074457013442e-05, "epoch": 0.12, "percentage": 5.91, "elapsed_time": "2:53:11", "remaining_time": "1 day, 21:56:52"} +{"current_steps": 1680, "total_steps": 28254, "loss": 0.7909, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.956560028948749e-05, "epoch": 0.12, "percentage": 5.95, "elapsed_time": "2:54:15", "remaining_time": "1 day, 21:56:21"} +{"current_steps": 1690, "total_steps": 28254, "loss": 0.8274, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.956042563727352e-05, "epoch": 0.12, "percentage": 5.98, "elapsed_time": "2:55:17", "remaining_time": "1 day, 21:55:19"} +{"current_steps": 1700, "total_steps": 28254, "loss": 0.8251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.955522061989018e-05, "epoch": 0.12, "percentage": 6.02, "elapsed_time": "2:56:17", "remaining_time": "1 day, 21:53:37"} +{"current_steps": 1710, "total_steps": 28254, "loss": 0.8129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9549985243772664e-05, "epoch": 0.12, "percentage": 6.05, "elapsed_time": "2:57:19", "remaining_time": "1 day, 21:52:39"} +{"current_steps": 1720, "total_steps": 28254, "loss": 0.8211, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.95447195153937e-05, "epoch": 0.12, "percentage": 6.09, "elapsed_time": "2:58:21", "remaining_time": "1 day, 21:51:34"} +{"current_steps": 1730, "total_steps": 28254, "loss": 0.8131, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9539423441263554e-05, "epoch": 0.12, "percentage": 6.12, "elapsed_time": "2:59:25", "remaining_time": "1 day, 21:50:51"} +{"current_steps": 1740, "total_steps": 28254, "loss": 0.7954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9534097027930006e-05, "epoch": 0.12, "percentage": 6.16, "elapsed_time": "3:00:26", "remaining_time": "1 day, 21:49:32"} +{"current_steps": 1750, "total_steps": 28254, "loss": 0.829, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.952874028197833e-05, "epoch": 0.12, "percentage": 6.19, "elapsed_time": "3:01:30", "remaining_time": "1 day, 21:48:53"} +{"current_steps": 1760, "total_steps": 28254, "loss": 0.8021, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9523353210031325e-05, "epoch": 0.12, "percentage": 6.23, "elapsed_time": "3:02:34", "remaining_time": "1 day, 21:48:16"} +{"current_steps": 1770, "total_steps": 28254, "loss": 0.8026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9517935818749275e-05, "epoch": 0.13, "percentage": 6.26, "elapsed_time": "3:03:35", "remaining_time": "1 day, 21:47:04"} +{"current_steps": 1780, "total_steps": 28254, "loss": 0.8616, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.951248811482993e-05, "epoch": 0.13, "percentage": 6.3, "elapsed_time": "3:04:38", "remaining_time": "1 day, 21:46:05"} +{"current_steps": 1790, "total_steps": 28254, "loss": 0.8444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.950701010500856e-05, "epoch": 0.13, "percentage": 6.34, "elapsed_time": "3:05:39", "remaining_time": "1 day, 21:44:47"} +{"current_steps": 1800, "total_steps": 28254, "loss": 0.8206, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.950150179605785e-05, "epoch": 0.13, "percentage": 6.37, "elapsed_time": "3:06:41", "remaining_time": "1 day, 21:43:50"} +{"current_steps": 1810, "total_steps": 28254, "loss": 0.7956, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9495963194787986e-05, "epoch": 0.13, "percentage": 6.41, "elapsed_time": "3:07:44", "remaining_time": "1 day, 21:42:51"} +{"current_steps": 1820, "total_steps": 28254, "loss": 0.7983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.94903943080466e-05, "epoch": 0.13, "percentage": 6.44, "elapsed_time": "3:08:46", "remaining_time": "1 day, 21:41:41"} +{"current_steps": 1830, "total_steps": 28254, "loss": 0.8392, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.948479514271874e-05, "epoch": 0.13, "percentage": 6.48, "elapsed_time": "3:09:47", "remaining_time": "1 day, 21:40:26"} +{"current_steps": 1840, "total_steps": 28254, "loss": 0.8538, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.947916570572693e-05, "epoch": 0.13, "percentage": 6.51, "elapsed_time": "3:10:49", "remaining_time": "1 day, 21:39:16"} +{"current_steps": 1850, "total_steps": 28254, "loss": 0.7881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.947350600403108e-05, "epoch": 0.13, "percentage": 6.55, "elapsed_time": "3:11:51", "remaining_time": "1 day, 21:38:16"} +{"current_steps": 1860, "total_steps": 28254, "loss": 0.8101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.946781604462854e-05, "epoch": 0.13, "percentage": 6.58, "elapsed_time": "3:12:53", "remaining_time": "1 day, 21:37:17"} +{"current_steps": 1870, "total_steps": 28254, "loss": 0.8344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.946209583455407e-05, "epoch": 0.13, "percentage": 6.62, "elapsed_time": "3:13:54", "remaining_time": "1 day, 21:35:54"} +{"current_steps": 1880, "total_steps": 28254, "loss": 0.8239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.945634538087983e-05, "epoch": 0.13, "percentage": 6.65, "elapsed_time": "3:14:58", "remaining_time": "1 day, 21:35:13"} +{"current_steps": 1890, "total_steps": 28254, "loss": 0.8351, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.945056469071536e-05, "epoch": 0.13, "percentage": 6.69, "elapsed_time": "3:16:00", "remaining_time": "1 day, 21:34:08"} +{"current_steps": 1900, "total_steps": 28254, "loss": 0.7967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.94447537712076e-05, "epoch": 0.13, "percentage": 6.72, "elapsed_time": "3:17:02", "remaining_time": "1 day, 21:33:06"} +{"current_steps": 1910, "total_steps": 28254, "loss": 0.797, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.943891262954083e-05, "epoch": 0.14, "percentage": 6.76, "elapsed_time": "3:18:04", "remaining_time": "1 day, 21:32:01"} +{"current_steps": 1920, "total_steps": 28254, "loss": 0.8146, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9433041272936734e-05, "epoch": 0.14, "percentage": 6.8, "elapsed_time": "3:19:07", "remaining_time": "1 day, 21:31:04"} +{"current_steps": 1930, "total_steps": 28254, "loss": 0.8237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.942713970865435e-05, "epoch": 0.14, "percentage": 6.83, "elapsed_time": "3:20:08", "remaining_time": "1 day, 21:29:49"} +{"current_steps": 1940, "total_steps": 28254, "loss": 0.7953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.942120794399002e-05, "epoch": 0.14, "percentage": 6.87, "elapsed_time": "3:21:10", "remaining_time": "1 day, 21:28:43"} +{"current_steps": 1950, "total_steps": 28254, "loss": 0.8066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9415245986277483e-05, "epoch": 0.14, "percentage": 6.9, "elapsed_time": "3:22:11", "remaining_time": "1 day, 21:27:23"} +{"current_steps": 1960, "total_steps": 28254, "loss": 0.8232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.940925384288775e-05, "epoch": 0.14, "percentage": 6.94, "elapsed_time": "3:23:14", "remaining_time": "1 day, 21:26:26"} +{"current_steps": 1970, "total_steps": 28254, "loss": 0.8156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.940323152122921e-05, "epoch": 0.14, "percentage": 6.97, "elapsed_time": "3:24:16", "remaining_time": "1 day, 21:25:22"} +{"current_steps": 1980, "total_steps": 28254, "loss": 0.8062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.939717902874751e-05, "epoch": 0.14, "percentage": 7.01, "elapsed_time": "3:25:17", "remaining_time": "1 day, 21:24:15"} +{"current_steps": 1990, "total_steps": 28254, "loss": 0.7818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9391096372925626e-05, "epoch": 0.14, "percentage": 7.04, "elapsed_time": "3:26:19", "remaining_time": "1 day, 21:23:09"} +{"current_steps": 2000, "total_steps": 28254, "loss": 0.8105, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9384983561283824e-05, "epoch": 0.14, "percentage": 7.08, "elapsed_time": "3:27:22", "remaining_time": "1 day, 21:22:11"} +{"current_steps": 2010, "total_steps": 28254, "loss": 0.8112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.937884060137966e-05, "epoch": 0.14, "percentage": 7.11, "elapsed_time": "3:28:24", "remaining_time": "1 day, 21:21:01"} +{"current_steps": 2020, "total_steps": 28254, "loss": 0.8102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9372667500807944e-05, "epoch": 0.14, "percentage": 7.15, "elapsed_time": "3:29:26", "remaining_time": "1 day, 21:20:02"} +{"current_steps": 2030, "total_steps": 28254, "loss": 0.8369, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9366464267200755e-05, "epoch": 0.14, "percentage": 7.18, "elapsed_time": "3:30:29", "remaining_time": "1 day, 21:19:08"} +{"current_steps": 2040, "total_steps": 28254, "loss": 0.7841, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.936023090822744e-05, "epoch": 0.14, "percentage": 7.22, "elapsed_time": "3:31:30", "remaining_time": "1 day, 21:17:52"} +{"current_steps": 2050, "total_steps": 28254, "loss": 0.8299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.935396743159459e-05, "epoch": 0.15, "percentage": 7.26, "elapsed_time": "3:32:32", "remaining_time": "1 day, 21:16:43"} +{"current_steps": 2060, "total_steps": 28254, "loss": 0.8048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.934767384504602e-05, "epoch": 0.15, "percentage": 7.29, "elapsed_time": "3:33:31", "remaining_time": "1 day, 21:15:10"} +{"current_steps": 2070, "total_steps": 28254, "loss": 0.825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.934135015636276e-05, "epoch": 0.15, "percentage": 7.33, "elapsed_time": "3:34:34", "remaining_time": "1 day, 21:14:13"} +{"current_steps": 2080, "total_steps": 28254, "loss": 0.7928, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.93349963733631e-05, "epoch": 0.15, "percentage": 7.36, "elapsed_time": "3:35:34", "remaining_time": "1 day, 21:12:42"} +{"current_steps": 2090, "total_steps": 28254, "loss": 0.8016, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9328612503902496e-05, "epoch": 0.15, "percentage": 7.4, "elapsed_time": "3:36:39", "remaining_time": "1 day, 21:12:14"} +{"current_steps": 2100, "total_steps": 28254, "loss": 0.8134, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.932219855587362e-05, "epoch": 0.15, "percentage": 7.43, "elapsed_time": "3:37:43", "remaining_time": "1 day, 21:11:40"} +{"current_steps": 2110, "total_steps": 28254, "loss": 0.8109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.931575453720633e-05, "epoch": 0.15, "percentage": 7.47, "elapsed_time": "3:38:45", "remaining_time": "1 day, 21:10:28"} +{"current_steps": 2120, "total_steps": 28254, "loss": 0.7908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.930928045586765e-05, "epoch": 0.15, "percentage": 7.5, "elapsed_time": "3:39:47", "remaining_time": "1 day, 21:09:30"} +{"current_steps": 2130, "total_steps": 28254, "loss": 0.7936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9302776319861785e-05, "epoch": 0.15, "percentage": 7.54, "elapsed_time": "3:40:50", "remaining_time": "1 day, 21:08:30"} +{"current_steps": 2140, "total_steps": 28254, "loss": 0.8008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.92962421372301e-05, "epoch": 0.15, "percentage": 7.57, "elapsed_time": "3:41:52", "remaining_time": "1 day, 21:07:26"} +{"current_steps": 2150, "total_steps": 28254, "loss": 0.8237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.928967791605108e-05, "epoch": 0.15, "percentage": 7.61, "elapsed_time": "3:42:56", "remaining_time": "1 day, 21:06:45"} +{"current_steps": 2160, "total_steps": 28254, "loss": 0.8127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.92830836644404e-05, "epoch": 0.15, "percentage": 7.64, "elapsed_time": "3:44:00", "remaining_time": "1 day, 21:06:06"} +{"current_steps": 2170, "total_steps": 28254, "loss": 0.8168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9276459390550815e-05, "epoch": 0.15, "percentage": 7.68, "elapsed_time": "3:45:01", "remaining_time": "1 day, 21:04:52"} +{"current_steps": 2180, "total_steps": 28254, "loss": 0.805, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.926980510257222e-05, "epoch": 0.15, "percentage": 7.72, "elapsed_time": "3:46:03", "remaining_time": "1 day, 21:03:52"} +{"current_steps": 2190, "total_steps": 28254, "loss": 0.8125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.926312080873161e-05, "epoch": 0.16, "percentage": 7.75, "elapsed_time": "3:47:06", "remaining_time": "1 day, 21:02:56"} +{"current_steps": 2200, "total_steps": 28254, "loss": 0.8267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9256406517293085e-05, "epoch": 0.16, "percentage": 7.79, "elapsed_time": "3:48:12", "remaining_time": "1 day, 21:02:33"} +{"current_steps": 2210, "total_steps": 28254, "loss": 0.8405, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.924966223655782e-05, "epoch": 0.16, "percentage": 7.82, "elapsed_time": "3:49:14", "remaining_time": "1 day, 21:01:34"} +{"current_steps": 2220, "total_steps": 28254, "loss": 0.7919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.92428879748641e-05, "epoch": 0.16, "percentage": 7.86, "elapsed_time": "3:50:17", "remaining_time": "1 day, 21:00:43"} +{"current_steps": 2230, "total_steps": 28254, "loss": 0.8398, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.923608374058721e-05, "epoch": 0.16, "percentage": 7.89, "elapsed_time": "3:51:19", "remaining_time": "1 day, 20:59:33"} +{"current_steps": 2240, "total_steps": 28254, "loss": 0.8179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9229249542139576e-05, "epoch": 0.16, "percentage": 7.93, "elapsed_time": "3:52:22", "remaining_time": "1 day, 20:58:37"} +{"current_steps": 2250, "total_steps": 28254, "loss": 0.8156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9222385387970604e-05, "epoch": 0.16, "percentage": 7.96, "elapsed_time": "3:53:23", "remaining_time": "1 day, 20:57:28"} +{"current_steps": 2260, "total_steps": 28254, "loss": 0.8089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.921549128656677e-05, "epoch": 0.16, "percentage": 8.0, "elapsed_time": "3:54:24", "remaining_time": "1 day, 20:56:06"} +{"current_steps": 2270, "total_steps": 28254, "loss": 0.8244, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.920856724645155e-05, "epoch": 0.16, "percentage": 8.03, "elapsed_time": "3:55:27", "remaining_time": "1 day, 20:55:14"} +{"current_steps": 2280, "total_steps": 28254, "loss": 0.8361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.920161327618546e-05, "epoch": 0.16, "percentage": 8.07, "elapsed_time": "3:56:28", "remaining_time": "1 day, 20:53:58"} +{"current_steps": 2290, "total_steps": 28254, "loss": 0.8159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.919462938436602e-05, "epoch": 0.16, "percentage": 8.11, "elapsed_time": "3:57:31", "remaining_time": "1 day, 20:53:05"} +{"current_steps": 2300, "total_steps": 28254, "loss": 0.8104, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.918761557962771e-05, "epoch": 0.16, "percentage": 8.14, "elapsed_time": "3:58:33", "remaining_time": "1 day, 20:51:55"} +{"current_steps": 2310, "total_steps": 28254, "loss": 0.7877, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9180571870642034e-05, "epoch": 0.16, "percentage": 8.18, "elapsed_time": "3:59:35", "remaining_time": "1 day, 20:50:57"} +{"current_steps": 2320, "total_steps": 28254, "loss": 0.7967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.917349826611744e-05, "epoch": 0.16, "percentage": 8.21, "elapsed_time": "4:00:37", "remaining_time": "1 day, 20:49:54"} +{"current_steps": 2330, "total_steps": 28254, "loss": 0.7729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.916639477479935e-05, "epoch": 0.16, "percentage": 8.25, "elapsed_time": "4:01:39", "remaining_time": "1 day, 20:48:45"} +{"current_steps": 2340, "total_steps": 28254, "loss": 0.8578, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.915926140547013e-05, "epoch": 0.17, "percentage": 8.28, "elapsed_time": "4:02:41", "remaining_time": "1 day, 20:47:41"} +{"current_steps": 2350, "total_steps": 28254, "loss": 0.8219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.915209816694908e-05, "epoch": 0.17, "percentage": 8.32, "elapsed_time": "4:03:44", "remaining_time": "1 day, 20:46:43"} +{"current_steps": 2360, "total_steps": 28254, "loss": 0.8145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.914490506809245e-05, "epoch": 0.17, "percentage": 8.35, "elapsed_time": "4:04:46", "remaining_time": "1 day, 20:45:37"} +{"current_steps": 2370, "total_steps": 28254, "loss": 0.8132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9137682117793395e-05, "epoch": 0.17, "percentage": 8.39, "elapsed_time": "4:05:48", "remaining_time": "1 day, 20:44:36"} +{"current_steps": 2380, "total_steps": 28254, "loss": 0.7872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9130429324981963e-05, "epoch": 0.17, "percentage": 8.42, "elapsed_time": "4:06:52", "remaining_time": "1 day, 20:43:52"} +{"current_steps": 2390, "total_steps": 28254, "loss": 0.8177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9123146698625134e-05, "epoch": 0.17, "percentage": 8.46, "elapsed_time": "4:07:53", "remaining_time": "1 day, 20:42:39"} +{"current_steps": 2400, "total_steps": 28254, "loss": 0.8052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.911583424772672e-05, "epoch": 0.17, "percentage": 8.49, "elapsed_time": "4:08:56", "remaining_time": "1 day, 20:41:42"} +{"current_steps": 2410, "total_steps": 28254, "loss": 0.7646, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.910849198132747e-05, "epoch": 0.17, "percentage": 8.53, "elapsed_time": "4:09:57", "remaining_time": "1 day, 20:40:30"} +{"current_steps": 2420, "total_steps": 28254, "loss": 0.8199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9101119908504935e-05, "epoch": 0.17, "percentage": 8.57, "elapsed_time": "4:11:00", "remaining_time": "1 day, 20:39:36"} +{"current_steps": 2430, "total_steps": 28254, "loss": 0.7819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.909371803837355e-05, "epoch": 0.17, "percentage": 8.6, "elapsed_time": "4:11:59", "remaining_time": "1 day, 20:38:00"} +{"current_steps": 2440, "total_steps": 28254, "loss": 0.7957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.908628638008458e-05, "epoch": 0.17, "percentage": 8.64, "elapsed_time": "4:13:01", "remaining_time": "1 day, 20:36:50"} +{"current_steps": 2450, "total_steps": 28254, "loss": 0.8103, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.907882494282614e-05, "epoch": 0.17, "percentage": 8.67, "elapsed_time": "4:14:03", "remaining_time": "1 day, 20:35:48"} +{"current_steps": 2460, "total_steps": 28254, "loss": 0.79, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.907133373582312e-05, "epoch": 0.17, "percentage": 8.71, "elapsed_time": "4:15:04", "remaining_time": "1 day, 20:34:30"} +{"current_steps": 2470, "total_steps": 28254, "loss": 0.8127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9063812768337246e-05, "epoch": 0.17, "percentage": 8.74, "elapsed_time": "4:16:05", "remaining_time": "1 day, 20:33:17"} +{"current_steps": 2480, "total_steps": 28254, "loss": 0.7915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.905626204966705e-05, "epoch": 0.18, "percentage": 8.78, "elapsed_time": "4:17:05", "remaining_time": "1 day, 20:31:51"} +{"current_steps": 2490, "total_steps": 28254, "loss": 0.8207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.90486815891478e-05, "epoch": 0.18, "percentage": 8.81, "elapsed_time": "4:18:07", "remaining_time": "1 day, 20:30:49"} +{"current_steps": 2500, "total_steps": 28254, "loss": 0.8162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9041071396151585e-05, "epoch": 0.18, "percentage": 8.85, "elapsed_time": "4:19:08", "remaining_time": "1 day, 20:29:36"} +{"current_steps": 2510, "total_steps": 28254, "loss": 0.8055, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.903343148008722e-05, "epoch": 0.18, "percentage": 8.88, "elapsed_time": "4:20:11", "remaining_time": "1 day, 20:28:44"} +{"current_steps": 2520, "total_steps": 28254, "loss": 0.8019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9025761850400283e-05, "epoch": 0.18, "percentage": 8.92, "elapsed_time": "4:21:13", "remaining_time": "1 day, 20:27:32"} +{"current_steps": 2530, "total_steps": 28254, "loss": 0.801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9018062516573086e-05, "epoch": 0.18, "percentage": 8.95, "elapsed_time": "4:22:16", "remaining_time": "1 day, 20:26:45"} +{"current_steps": 2540, "total_steps": 28254, "loss": 0.7831, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.901033348812467e-05, "epoch": 0.18, "percentage": 8.99, "elapsed_time": "4:23:17", "remaining_time": "1 day, 20:25:32"} +{"current_steps": 2550, "total_steps": 28254, "loss": 0.794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9002574774610776e-05, "epoch": 0.18, "percentage": 9.03, "elapsed_time": "4:24:22", "remaining_time": "1 day, 20:24:49"} +{"current_steps": 2560, "total_steps": 28254, "loss": 0.7902, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.899478638562386e-05, "epoch": 0.18, "percentage": 9.06, "elapsed_time": "4:25:25", "remaining_time": "1 day, 20:23:56"} +{"current_steps": 2570, "total_steps": 28254, "loss": 0.785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8986968330793054e-05, "epoch": 0.18, "percentage": 9.1, "elapsed_time": "4:26:27", "remaining_time": "1 day, 20:22:59"} +{"current_steps": 2580, "total_steps": 28254, "loss": 0.8006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.897912061978418e-05, "epoch": 0.18, "percentage": 9.13, "elapsed_time": "4:27:33", "remaining_time": "1 day, 20:22:33"} +{"current_steps": 2590, "total_steps": 28254, "loss": 0.8208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.897124326229972e-05, "epoch": 0.18, "percentage": 9.17, "elapsed_time": "4:28:34", "remaining_time": "1 day, 20:21:19"} +{"current_steps": 2600, "total_steps": 28254, "loss": 0.7793, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.896333626807881e-05, "epoch": 0.18, "percentage": 9.2, "elapsed_time": "4:29:38", "remaining_time": "1 day, 20:20:30"} +{"current_steps": 2610, "total_steps": 28254, "loss": 0.812, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8955399646897215e-05, "epoch": 0.18, "percentage": 9.24, "elapsed_time": "4:30:40", "remaining_time": "1 day, 20:19:25"} +{"current_steps": 2620, "total_steps": 28254, "loss": 0.7948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.894743340856735e-05, "epoch": 0.19, "percentage": 9.27, "elapsed_time": "4:31:42", "remaining_time": "1 day, 20:18:22"} +{"current_steps": 2630, "total_steps": 28254, "loss": 0.7955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.893943756293823e-05, "epoch": 0.19, "percentage": 9.31, "elapsed_time": "4:32:44", "remaining_time": "1 day, 20:17:23"} +{"current_steps": 2640, "total_steps": 28254, "loss": 0.8363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.893141211989549e-05, "epoch": 0.19, "percentage": 9.34, "elapsed_time": "4:33:47", "remaining_time": "1 day, 20:16:23"} +{"current_steps": 2650, "total_steps": 28254, "loss": 0.7986, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.892335708936135e-05, "epoch": 0.19, "percentage": 9.38, "elapsed_time": "4:34:49", "remaining_time": "1 day, 20:15:24"} +{"current_steps": 2660, "total_steps": 28254, "loss": 0.8249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.89152724812946e-05, "epoch": 0.19, "percentage": 9.41, "elapsed_time": "4:35:51", "remaining_time": "1 day, 20:14:18"} +{"current_steps": 2670, "total_steps": 28254, "loss": 0.7951, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.890715830569062e-05, "epoch": 0.19, "percentage": 9.45, "elapsed_time": "4:36:54", "remaining_time": "1 day, 20:13:17"} +{"current_steps": 2680, "total_steps": 28254, "loss": 0.8098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.889901457258133e-05, "epoch": 0.19, "percentage": 9.49, "elapsed_time": "4:37:56", "remaining_time": "1 day, 20:12:19"} +{"current_steps": 2690, "total_steps": 28254, "loss": 0.7781, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.889084129203519e-05, "epoch": 0.19, "percentage": 9.52, "elapsed_time": "4:38:57", "remaining_time": "1 day, 20:11:00"} +{"current_steps": 2700, "total_steps": 28254, "loss": 0.7817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.888263847415721e-05, "epoch": 0.19, "percentage": 9.56, "elapsed_time": "4:39:59", "remaining_time": "1 day, 20:10:00"} +{"current_steps": 2710, "total_steps": 28254, "loss": 0.7848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.887440612908889e-05, "epoch": 0.19, "percentage": 9.59, "elapsed_time": "4:41:00", "remaining_time": "1 day, 20:08:48"} +{"current_steps": 2720, "total_steps": 28254, "loss": 0.7965, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.886614426700826e-05, "epoch": 0.19, "percentage": 9.63, "elapsed_time": "4:42:02", "remaining_time": "1 day, 20:07:38"} +{"current_steps": 2730, "total_steps": 28254, "loss": 0.8067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8857852898129844e-05, "epoch": 0.19, "percentage": 9.66, "elapsed_time": "4:43:05", "remaining_time": "1 day, 20:06:45"} +{"current_steps": 2740, "total_steps": 28254, "loss": 0.7933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.884953203270463e-05, "epoch": 0.19, "percentage": 9.7, "elapsed_time": "4:44:06", "remaining_time": "1 day, 20:05:35"} +{"current_steps": 2750, "total_steps": 28254, "loss": 0.7918, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.884118168102008e-05, "epoch": 0.19, "percentage": 9.73, "elapsed_time": "4:45:10", "remaining_time": "1 day, 20:04:48"} +{"current_steps": 2760, "total_steps": 28254, "loss": 0.7758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.883280185340011e-05, "epoch": 0.2, "percentage": 9.77, "elapsed_time": "4:46:12", "remaining_time": "1 day, 20:03:46"} +{"current_steps": 2770, "total_steps": 28254, "loss": 0.7765, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8824392560205085e-05, "epoch": 0.2, "percentage": 9.8, "elapsed_time": "4:47:14", "remaining_time": "1 day, 20:02:39"} +{"current_steps": 2780, "total_steps": 28254, "loss": 0.7848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.88159538118318e-05, "epoch": 0.2, "percentage": 9.84, "elapsed_time": "4:48:15", "remaining_time": "1 day, 20:01:21"} +{"current_steps": 2790, "total_steps": 28254, "loss": 0.7852, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8807485618713463e-05, "epoch": 0.2, "percentage": 9.87, "elapsed_time": "4:49:17", "remaining_time": "1 day, 20:00:19"} +{"current_steps": 2800, "total_steps": 28254, "loss": 0.8201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8798987991319686e-05, "epoch": 0.2, "percentage": 9.91, "elapsed_time": "4:50:19", "remaining_time": "1 day, 19:59:12"} +{"current_steps": 2810, "total_steps": 28254, "loss": 0.8024, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.879046094015646e-05, "epoch": 0.2, "percentage": 9.95, "elapsed_time": "4:51:21", "remaining_time": "1 day, 19:58:08"} +{"current_steps": 2820, "total_steps": 28254, "loss": 0.7921, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8781904475766174e-05, "epoch": 0.2, "percentage": 9.98, "elapsed_time": "4:52:22", "remaining_time": "1 day, 19:57:01"} +{"current_steps": 2830, "total_steps": 28254, "loss": 0.7541, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.877331860872758e-05, "epoch": 0.2, "percentage": 10.02, "elapsed_time": "4:53:24", "remaining_time": "1 day, 19:55:58"} +{"current_steps": 2840, "total_steps": 28254, "loss": 0.7689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.876470334965576e-05, "epoch": 0.2, "percentage": 10.05, "elapsed_time": "4:54:27", "remaining_time": "1 day, 19:54:57"} +{"current_steps": 2850, "total_steps": 28254, "loss": 0.8107, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.875605870920217e-05, "epoch": 0.2, "percentage": 10.09, "elapsed_time": "4:55:31", "remaining_time": "1 day, 19:54:16"} +{"current_steps": 2860, "total_steps": 28254, "loss": 0.7784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8747384698054546e-05, "epoch": 0.2, "percentage": 10.12, "elapsed_time": "4:56:34", "remaining_time": "1 day, 19:53:21"} +{"current_steps": 2870, "total_steps": 28254, "loss": 0.7825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.873868132693699e-05, "epoch": 0.2, "percentage": 10.16, "elapsed_time": "4:57:36", "remaining_time": "1 day, 19:52:09"} +{"current_steps": 2880, "total_steps": 28254, "loss": 0.762, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.872994860660985e-05, "epoch": 0.2, "percentage": 10.19, "elapsed_time": "4:58:37", "remaining_time": "1 day, 19:51:01"} +{"current_steps": 2890, "total_steps": 28254, "loss": 0.7719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.872118654786979e-05, "epoch": 0.2, "percentage": 10.23, "elapsed_time": "4:59:39", "remaining_time": "1 day, 19:49:54"} +{"current_steps": 2900, "total_steps": 28254, "loss": 0.8455, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.871239516154976e-05, "epoch": 0.21, "percentage": 10.26, "elapsed_time": "5:00:40", "remaining_time": "1 day, 19:48:46"} +{"current_steps": 2910, "total_steps": 28254, "loss": 0.7819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.870357445851893e-05, "epoch": 0.21, "percentage": 10.3, "elapsed_time": "5:01:42", "remaining_time": "1 day, 19:47:43"} +{"current_steps": 2920, "total_steps": 28254, "loss": 0.7697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.869472444968274e-05, "epoch": 0.21, "percentage": 10.33, "elapsed_time": "5:02:43", "remaining_time": "1 day, 19:46:24"} +{"current_steps": 2930, "total_steps": 28254, "loss": 0.7829, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8685845145982866e-05, "epoch": 0.21, "percentage": 10.37, "elapsed_time": "5:03:45", "remaining_time": "1 day, 19:45:23"} +{"current_steps": 2940, "total_steps": 28254, "loss": 0.8084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.867693655839719e-05, "epoch": 0.21, "percentage": 10.41, "elapsed_time": "5:04:47", "remaining_time": "1 day, 19:44:19"} +{"current_steps": 2950, "total_steps": 28254, "loss": 0.8239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.866799869793979e-05, "epoch": 0.21, "percentage": 10.44, "elapsed_time": "5:05:50", "remaining_time": "1 day, 19:43:20"} +{"current_steps": 2960, "total_steps": 28254, "loss": 0.7885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8659031575660966e-05, "epoch": 0.21, "percentage": 10.48, "elapsed_time": "5:06:51", "remaining_time": "1 day, 19:42:10"} +{"current_steps": 2970, "total_steps": 28254, "loss": 0.7958, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.865003520264717e-05, "epoch": 0.21, "percentage": 10.51, "elapsed_time": "5:07:52", "remaining_time": "1 day, 19:40:59"} +{"current_steps": 2980, "total_steps": 28254, "loss": 0.7812, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8641009590021035e-05, "epoch": 0.21, "percentage": 10.55, "elapsed_time": "5:08:57", "remaining_time": "1 day, 19:40:22"} +{"current_steps": 2990, "total_steps": 28254, "loss": 0.8139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8631954748941327e-05, "epoch": 0.21, "percentage": 10.58, "elapsed_time": "5:09:58", "remaining_time": "1 day, 19:39:08"} +{"current_steps": 3000, "total_steps": 28254, "loss": 0.7709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.862287069060296e-05, "epoch": 0.21, "percentage": 10.62, "elapsed_time": "5:11:00", "remaining_time": "1 day, 19:38:00"} +{"current_steps": 3010, "total_steps": 28254, "loss": 0.8124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.861375742623697e-05, "epoch": 0.21, "percentage": 10.65, "elapsed_time": "5:12:01", "remaining_time": "1 day, 19:36:47"} +{"current_steps": 3020, "total_steps": 28254, "loss": 0.8168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.860461496711049e-05, "epoch": 0.21, "percentage": 10.69, "elapsed_time": "5:13:04", "remaining_time": "1 day, 19:35:54"} +{"current_steps": 3030, "total_steps": 28254, "loss": 0.8055, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8595443324526765e-05, "epoch": 0.21, "percentage": 10.72, "elapsed_time": "5:14:04", "remaining_time": "1 day, 19:34:38"} +{"current_steps": 3040, "total_steps": 28254, "loss": 0.7721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.858624250982512e-05, "epoch": 0.22, "percentage": 10.76, "elapsed_time": "5:15:06", "remaining_time": "1 day, 19:33:30"} +{"current_steps": 3050, "total_steps": 28254, "loss": 0.8, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.857701253438093e-05, "epoch": 0.22, "percentage": 10.79, "elapsed_time": "5:16:09", "remaining_time": "1 day, 19:32:35"} +{"current_steps": 3060, "total_steps": 28254, "loss": 0.825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.856775340960563e-05, "epoch": 0.22, "percentage": 10.83, "elapsed_time": "5:17:12", "remaining_time": "1 day, 19:31:39"} +{"current_steps": 3070, "total_steps": 28254, "loss": 0.8102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.855846514694671e-05, "epoch": 0.22, "percentage": 10.87, "elapsed_time": "5:18:14", "remaining_time": "1 day, 19:30:34"} +{"current_steps": 3080, "total_steps": 28254, "loss": 0.8078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.854914775788766e-05, "epoch": 0.22, "percentage": 10.9, "elapsed_time": "5:19:15", "remaining_time": "1 day, 19:29:26"} +{"current_steps": 3090, "total_steps": 28254, "loss": 0.7921, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.853980125394799e-05, "epoch": 0.22, "percentage": 10.94, "elapsed_time": "5:20:18", "remaining_time": "1 day, 19:28:28"} +{"current_steps": 3100, "total_steps": 28254, "loss": 0.772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.853042564668321e-05, "epoch": 0.22, "percentage": 10.97, "elapsed_time": "5:21:22", "remaining_time": "1 day, 19:27:39"} +{"current_steps": 3110, "total_steps": 28254, "loss": 0.8153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8521020947684815e-05, "epoch": 0.22, "percentage": 11.01, "elapsed_time": "5:22:25", "remaining_time": "1 day, 19:26:49"} +{"current_steps": 3120, "total_steps": 28254, "loss": 0.7686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8511587168580254e-05, "epoch": 0.22, "percentage": 11.04, "elapsed_time": "5:23:29", "remaining_time": "1 day, 19:26:01"} +{"current_steps": 3130, "total_steps": 28254, "loss": 0.7748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.850212432103294e-05, "epoch": 0.22, "percentage": 11.08, "elapsed_time": "5:24:31", "remaining_time": "1 day, 19:24:51"} +{"current_steps": 3140, "total_steps": 28254, "loss": 0.7876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8492632416742214e-05, "epoch": 0.22, "percentage": 11.11, "elapsed_time": "5:25:31", "remaining_time": "1 day, 19:23:38"} +{"current_steps": 3150, "total_steps": 28254, "loss": 0.8033, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.848311146744335e-05, "epoch": 0.22, "percentage": 11.15, "elapsed_time": "5:26:34", "remaining_time": "1 day, 19:22:40"} +{"current_steps": 3160, "total_steps": 28254, "loss": 0.7947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.847356148490755e-05, "epoch": 0.22, "percentage": 11.18, "elapsed_time": "5:27:36", "remaining_time": "1 day, 19:21:33"} +{"current_steps": 3170, "total_steps": 28254, "loss": 0.7956, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8463982480941865e-05, "epoch": 0.22, "percentage": 11.22, "elapsed_time": "5:28:37", "remaining_time": "1 day, 19:20:24"} +{"current_steps": 3180, "total_steps": 28254, "loss": 0.8006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.845437446738926e-05, "epoch": 0.23, "percentage": 11.26, "elapsed_time": "5:29:40", "remaining_time": "1 day, 19:19:26"} +{"current_steps": 3190, "total_steps": 28254, "loss": 0.8075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.844473745612857e-05, "epoch": 0.23, "percentage": 11.29, "elapsed_time": "5:30:41", "remaining_time": "1 day, 19:18:12"} +{"current_steps": 3200, "total_steps": 28254, "loss": 0.795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8435071459074456e-05, "epoch": 0.23, "percentage": 11.33, "elapsed_time": "5:31:40", "remaining_time": "1 day, 19:16:52"} +{"current_steps": 3210, "total_steps": 28254, "loss": 0.7916, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.842537648817743e-05, "epoch": 0.23, "percentage": 11.36, "elapsed_time": "5:32:42", "remaining_time": "1 day, 19:15:45"} +{"current_steps": 3220, "total_steps": 28254, "loss": 0.7825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.841565255542384e-05, "epoch": 0.23, "percentage": 11.4, "elapsed_time": "5:33:43", "remaining_time": "1 day, 19:14:37"} +{"current_steps": 3230, "total_steps": 28254, "loss": 0.8057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.84058996728358e-05, "epoch": 0.23, "percentage": 11.43, "elapsed_time": "5:34:47", "remaining_time": "1 day, 19:13:46"} +{"current_steps": 3240, "total_steps": 28254, "loss": 0.7943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.839611785247125e-05, "epoch": 0.23, "percentage": 11.47, "elapsed_time": "5:35:52", "remaining_time": "1 day, 19:13:04"} +{"current_steps": 3250, "total_steps": 28254, "loss": 0.8024, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8386307106423924e-05, "epoch": 0.23, "percentage": 11.5, "elapsed_time": "5:36:56", "remaining_time": "1 day, 19:12:15"} +{"current_steps": 3260, "total_steps": 28254, "loss": 0.7555, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8376467446823266e-05, "epoch": 0.23, "percentage": 11.54, "elapsed_time": "5:37:58", "remaining_time": "1 day, 19:11:15"} +{"current_steps": 3270, "total_steps": 28254, "loss": 0.7957, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8366598885834496e-05, "epoch": 0.23, "percentage": 11.57, "elapsed_time": "5:39:01", "remaining_time": "1 day, 19:10:13"} +{"current_steps": 3280, "total_steps": 28254, "loss": 0.7763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.835670143565857e-05, "epoch": 0.23, "percentage": 11.61, "elapsed_time": "5:40:01", "remaining_time": "1 day, 19:08:54"} +{"current_steps": 3290, "total_steps": 28254, "loss": 0.8111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.834677510853216e-05, "epoch": 0.23, "percentage": 11.64, "elapsed_time": "5:41:01", "remaining_time": "1 day, 19:07:41"} +{"current_steps": 3300, "total_steps": 28254, "loss": 0.764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8336819916727624e-05, "epoch": 0.23, "percentage": 11.68, "elapsed_time": "5:42:01", "remaining_time": "1 day, 19:06:21"} +{"current_steps": 3310, "total_steps": 28254, "loss": 0.7501, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.832683587255302e-05, "epoch": 0.23, "percentage": 11.72, "elapsed_time": "5:43:02", "remaining_time": "1 day, 19:05:11"} +{"current_steps": 3320, "total_steps": 28254, "loss": 0.8185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.831682298835208e-05, "epoch": 0.23, "percentage": 11.75, "elapsed_time": "5:44:05", "remaining_time": "1 day, 19:04:11"} +{"current_steps": 3330, "total_steps": 28254, "loss": 0.7918, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8306781276504186e-05, "epoch": 0.24, "percentage": 11.79, "elapsed_time": "5:45:09", "remaining_time": "1 day, 19:03:23"} +{"current_steps": 3340, "total_steps": 28254, "loss": 0.8076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8296710749424355e-05, "epoch": 0.24, "percentage": 11.82, "elapsed_time": "5:46:09", "remaining_time": "1 day, 19:02:08"} +{"current_steps": 3350, "total_steps": 28254, "loss": 0.8178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.828661141956325e-05, "epoch": 0.24, "percentage": 11.86, "elapsed_time": "5:47:09", "remaining_time": "1 day, 19:00:48"} +{"current_steps": 3360, "total_steps": 28254, "loss": 0.8239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8276483299407124e-05, "epoch": 0.24, "percentage": 11.89, "elapsed_time": "5:48:12", "remaining_time": "1 day, 18:59:48"} +{"current_steps": 3370, "total_steps": 28254, "loss": 0.7565, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.826632640147783e-05, "epoch": 0.24, "percentage": 11.93, "elapsed_time": "5:49:12", "remaining_time": "1 day, 18:58:35"} +{"current_steps": 3380, "total_steps": 28254, "loss": 0.8099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.82561407383328e-05, "epoch": 0.24, "percentage": 11.96, "elapsed_time": "5:50:14", "remaining_time": "1 day, 18:57:29"} +{"current_steps": 3390, "total_steps": 28254, "loss": 0.7945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.824592632256504e-05, "epoch": 0.24, "percentage": 12.0, "elapsed_time": "5:51:16", "remaining_time": "1 day, 18:56:22"} +{"current_steps": 3400, "total_steps": 28254, "loss": 0.7583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.823568316680309e-05, "epoch": 0.24, "percentage": 12.03, "elapsed_time": "5:52:17", "remaining_time": "1 day, 18:55:17"} +{"current_steps": 3410, "total_steps": 28254, "loss": 0.8081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.822541128371104e-05, "epoch": 0.24, "percentage": 12.07, "elapsed_time": "5:53:18", "remaining_time": "1 day, 18:54:05"} +{"current_steps": 3420, "total_steps": 28254, "loss": 0.7955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.821511068598846e-05, "epoch": 0.24, "percentage": 12.1, "elapsed_time": "5:54:23", "remaining_time": "1 day, 18:53:22"} +{"current_steps": 3430, "total_steps": 28254, "loss": 0.7948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.820478138637048e-05, "epoch": 0.24, "percentage": 12.14, "elapsed_time": "5:55:24", "remaining_time": "1 day, 18:52:12"} +{"current_steps": 3440, "total_steps": 28254, "loss": 0.7969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8194423397627654e-05, "epoch": 0.24, "percentage": 12.18, "elapsed_time": "5:56:26", "remaining_time": "1 day, 18:51:09"} +{"current_steps": 3450, "total_steps": 28254, "loss": 0.7719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.818403673256604e-05, "epoch": 0.24, "percentage": 12.21, "elapsed_time": "5:57:30", "remaining_time": "1 day, 18:50:20"} +{"current_steps": 3460, "total_steps": 28254, "loss": 0.7689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.817362140402716e-05, "epoch": 0.24, "percentage": 12.25, "elapsed_time": "5:58:32", "remaining_time": "1 day, 18:49:16"} +{"current_steps": 3470, "total_steps": 28254, "loss": 0.7976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.816317742488794e-05, "epoch": 0.25, "percentage": 12.28, "elapsed_time": "5:59:35", "remaining_time": "1 day, 18:48:23"} +{"current_steps": 3480, "total_steps": 28254, "loss": 0.7869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.815270480806075e-05, "epoch": 0.25, "percentage": 12.32, "elapsed_time": "6:00:36", "remaining_time": "1 day, 18:47:09"} +{"current_steps": 3490, "total_steps": 28254, "loss": 0.8099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.814220356649336e-05, "epoch": 0.25, "percentage": 12.35, "elapsed_time": "6:01:37", "remaining_time": "1 day, 18:46:00"} +{"current_steps": 3500, "total_steps": 28254, "loss": 0.8057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.813167371316894e-05, "epoch": 0.25, "percentage": 12.39, "elapsed_time": "6:02:40", "remaining_time": "1 day, 18:45:01"} +{"current_steps": 3510, "total_steps": 28254, "loss": 0.764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.812111526110602e-05, "epoch": 0.25, "percentage": 12.42, "elapsed_time": "6:03:43", "remaining_time": "1 day, 18:44:07"} +{"current_steps": 3520, "total_steps": 28254, "loss": 0.7714, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.811052822335849e-05, "epoch": 0.25, "percentage": 12.46, "elapsed_time": "6:04:46", "remaining_time": "1 day, 18:43:06"} +{"current_steps": 3530, "total_steps": 28254, "loss": 0.8108, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8099912613015596e-05, "epoch": 0.25, "percentage": 12.49, "elapsed_time": "6:05:48", "remaining_time": "1 day, 18:42:04"} +{"current_steps": 3540, "total_steps": 28254, "loss": 0.772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.808926844320189e-05, "epoch": 0.25, "percentage": 12.53, "elapsed_time": "6:06:51", "remaining_time": "1 day, 18:41:06"} +{"current_steps": 3550, "total_steps": 28254, "loss": 0.8022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.807859572707725e-05, "epoch": 0.25, "percentage": 12.56, "elapsed_time": "6:07:50", "remaining_time": "1 day, 18:39:46"} +{"current_steps": 3560, "total_steps": 28254, "loss": 0.7885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.806789447783683e-05, "epoch": 0.25, "percentage": 12.6, "elapsed_time": "6:08:51", "remaining_time": "1 day, 18:38:35"} +{"current_steps": 3570, "total_steps": 28254, "loss": 0.7847, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8057164708711064e-05, "epoch": 0.25, "percentage": 12.64, "elapsed_time": "6:09:55", "remaining_time": "1 day, 18:37:45"} +{"current_steps": 3580, "total_steps": 28254, "loss": 0.7756, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.804640643296568e-05, "epoch": 0.25, "percentage": 12.67, "elapsed_time": "6:10:57", "remaining_time": "1 day, 18:36:42"} +{"current_steps": 3590, "total_steps": 28254, "loss": 0.7849, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.80356196639016e-05, "epoch": 0.25, "percentage": 12.71, "elapsed_time": "6:12:00", "remaining_time": "1 day, 18:35:43"} +{"current_steps": 3600, "total_steps": 28254, "loss": 0.8072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8024804414855e-05, "epoch": 0.25, "percentage": 12.74, "elapsed_time": "6:13:03", "remaining_time": "1 day, 18:34:52"} +{"current_steps": 3610, "total_steps": 28254, "loss": 0.7894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.801396069919727e-05, "epoch": 0.26, "percentage": 12.78, "elapsed_time": "6:14:07", "remaining_time": "1 day, 18:33:56"} +{"current_steps": 3620, "total_steps": 28254, "loss": 0.8029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.800308853033498e-05, "epoch": 0.26, "percentage": 12.81, "elapsed_time": "6:15:09", "remaining_time": "1 day, 18:32:59"} +{"current_steps": 3630, "total_steps": 28254, "loss": 0.8059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7992187921709895e-05, "epoch": 0.26, "percentage": 12.85, "elapsed_time": "6:16:11", "remaining_time": "1 day, 18:31:53"} +{"current_steps": 3640, "total_steps": 28254, "loss": 0.7736, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.798125888679893e-05, "epoch": 0.26, "percentage": 12.88, "elapsed_time": "6:17:15", "remaining_time": "1 day, 18:31:05"} +{"current_steps": 3650, "total_steps": 28254, "loss": 0.7819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7970301439114145e-05, "epoch": 0.26, "percentage": 12.92, "elapsed_time": "6:18:17", "remaining_time": "1 day, 18:30:01"} +{"current_steps": 3660, "total_steps": 28254, "loss": 0.8138, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.795931559220273e-05, "epoch": 0.26, "percentage": 12.95, "elapsed_time": "6:19:20", "remaining_time": "1 day, 18:29:00"} +{"current_steps": 3670, "total_steps": 28254, "loss": 0.7952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.794830135964698e-05, "epoch": 0.26, "percentage": 12.99, "elapsed_time": "6:20:19", "remaining_time": "1 day, 18:27:40"} +{"current_steps": 3680, "total_steps": 28254, "loss": 0.7933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.79372587550643e-05, "epoch": 0.26, "percentage": 13.02, "elapsed_time": "6:21:23", "remaining_time": "1 day, 18:26:46"} +{"current_steps": 3690, "total_steps": 28254, "loss": 0.7588, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.792618779210716e-05, "epoch": 0.26, "percentage": 13.06, "elapsed_time": "6:22:24", "remaining_time": "1 day, 18:25:41"} +{"current_steps": 3700, "total_steps": 28254, "loss": 0.788, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.79150884844631e-05, "epoch": 0.26, "percentage": 13.1, "elapsed_time": "6:23:26", "remaining_time": "1 day, 18:24:33"} +{"current_steps": 3710, "total_steps": 28254, "loss": 0.7668, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.790396084585469e-05, "epoch": 0.26, "percentage": 13.13, "elapsed_time": "6:24:27", "remaining_time": "1 day, 18:23:26"} +{"current_steps": 3720, "total_steps": 28254, "loss": 0.7863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7892804890039535e-05, "epoch": 0.26, "percentage": 13.17, "elapsed_time": "6:25:29", "remaining_time": "1 day, 18:22:24"} +{"current_steps": 3730, "total_steps": 28254, "loss": 0.8216, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.788162063081025e-05, "epoch": 0.26, "percentage": 13.2, "elapsed_time": "6:26:33", "remaining_time": "1 day, 18:21:35"} +{"current_steps": 3740, "total_steps": 28254, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.787040808199445e-05, "epoch": 0.26, "percentage": 13.24, "elapsed_time": "6:27:36", "remaining_time": "1 day, 18:20:36"} +{"current_steps": 3750, "total_steps": 28254, "loss": 0.7967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.785916725745471e-05, "epoch": 0.27, "percentage": 13.27, "elapsed_time": "6:28:37", "remaining_time": "1 day, 18:19:26"} +{"current_steps": 3760, "total_steps": 28254, "loss": 0.793, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.784789817108858e-05, "epoch": 0.27, "percentage": 13.31, "elapsed_time": "6:29:40", "remaining_time": "1 day, 18:18:26"} +{"current_steps": 3770, "total_steps": 28254, "loss": 0.7863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.783660083682853e-05, "epoch": 0.27, "percentage": 13.34, "elapsed_time": "6:30:43", "remaining_time": "1 day, 18:17:32"} +{"current_steps": 3780, "total_steps": 28254, "loss": 0.7362, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7825275268641984e-05, "epoch": 0.27, "percentage": 13.38, "elapsed_time": "6:31:45", "remaining_time": "1 day, 18:16:26"} +{"current_steps": 3790, "total_steps": 28254, "loss": 0.7477, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.781392148053124e-05, "epoch": 0.27, "percentage": 13.41, "elapsed_time": "6:32:49", "remaining_time": "1 day, 18:15:35"} +{"current_steps": 3800, "total_steps": 28254, "loss": 0.7581, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.780253948653352e-05, "epoch": 0.27, "percentage": 13.45, "elapsed_time": "6:33:51", "remaining_time": "1 day, 18:14:34"} +{"current_steps": 3810, "total_steps": 28254, "loss": 0.7883, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.779112930072087e-05, "epoch": 0.27, "percentage": 13.48, "elapsed_time": "6:34:53", "remaining_time": "1 day, 18:13:30"} +{"current_steps": 3820, "total_steps": 28254, "loss": 0.7659, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7779690937200254e-05, "epoch": 0.27, "percentage": 13.52, "elapsed_time": "6:35:54", "remaining_time": "1 day, 18:12:21"} +{"current_steps": 3830, "total_steps": 28254, "loss": 0.7475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7768224410113424e-05, "epoch": 0.27, "percentage": 13.56, "elapsed_time": "6:36:58", "remaining_time": "1 day, 18:11:30"} +{"current_steps": 3840, "total_steps": 28254, "loss": 0.7468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7756729733636976e-05, "epoch": 0.27, "percentage": 13.59, "elapsed_time": "6:38:02", "remaining_time": "1 day, 18:10:37"} +{"current_steps": 3850, "total_steps": 28254, "loss": 0.7625, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.774520692198228e-05, "epoch": 0.27, "percentage": 13.63, "elapsed_time": "6:39:05", "remaining_time": "1 day, 18:09:44"} +{"current_steps": 3860, "total_steps": 28254, "loss": 0.7745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7733655989395533e-05, "epoch": 0.27, "percentage": 13.66, "elapsed_time": "6:40:08", "remaining_time": "1 day, 18:08:47"} +{"current_steps": 3870, "total_steps": 28254, "loss": 0.7741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.772207695015767e-05, "epoch": 0.27, "percentage": 13.7, "elapsed_time": "6:41:10", "remaining_time": "1 day, 18:07:43"} +{"current_steps": 3880, "total_steps": 28254, "loss": 0.7774, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.771046981858439e-05, "epoch": 0.27, "percentage": 13.73, "elapsed_time": "6:42:11", "remaining_time": "1 day, 18:06:32"} +{"current_steps": 3890, "total_steps": 28254, "loss": 0.7632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.76988346090261e-05, "epoch": 0.28, "percentage": 13.77, "elapsed_time": "6:43:13", "remaining_time": "1 day, 18:05:29"} +{"current_steps": 3900, "total_steps": 28254, "loss": 0.7729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.768717133586795e-05, "epoch": 0.28, "percentage": 13.8, "elapsed_time": "6:44:16", "remaining_time": "1 day, 18:04:32"} +{"current_steps": 3910, "total_steps": 28254, "loss": 0.7626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.767548001352978e-05, "epoch": 0.28, "percentage": 13.84, "elapsed_time": "6:45:19", "remaining_time": "1 day, 18:03:33"} +{"current_steps": 3920, "total_steps": 28254, "loss": 0.771, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7663760656466085e-05, "epoch": 0.28, "percentage": 13.87, "elapsed_time": "6:46:20", "remaining_time": "1 day, 18:02:26"} +{"current_steps": 3930, "total_steps": 28254, "loss": 0.7865, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.765201327916605e-05, "epoch": 0.28, "percentage": 13.91, "elapsed_time": "6:47:24", "remaining_time": "1 day, 18:01:32"} +{"current_steps": 3940, "total_steps": 28254, "loss": 0.7758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.764023789615349e-05, "epoch": 0.28, "percentage": 13.94, "elapsed_time": "6:48:25", "remaining_time": "1 day, 18:00:26"} +{"current_steps": 3950, "total_steps": 28254, "loss": 0.7699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7628434521986845e-05, "epoch": 0.28, "percentage": 13.98, "elapsed_time": "6:49:29", "remaining_time": "1 day, 17:59:31"} +{"current_steps": 3960, "total_steps": 28254, "loss": 0.7967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.761660317125917e-05, "epoch": 0.28, "percentage": 14.02, "elapsed_time": "6:50:30", "remaining_time": "1 day, 17:58:26"} +{"current_steps": 3970, "total_steps": 28254, "loss": 0.767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.760474385859808e-05, "epoch": 0.28, "percentage": 14.05, "elapsed_time": "6:51:32", "remaining_time": "1 day, 17:57:21"} +{"current_steps": 3980, "total_steps": 28254, "loss": 0.8021, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.75928565986658e-05, "epoch": 0.28, "percentage": 14.09, "elapsed_time": "6:52:34", "remaining_time": "1 day, 17:56:16"} +{"current_steps": 3990, "total_steps": 28254, "loss": 0.7811, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7580941406159084e-05, "epoch": 0.28, "percentage": 14.12, "elapsed_time": "6:53:34", "remaining_time": "1 day, 17:55:04"} +{"current_steps": 4000, "total_steps": 28254, "loss": 0.773, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.756899829580923e-05, "epoch": 0.28, "percentage": 14.16, "elapsed_time": "6:54:37", "remaining_time": "1 day, 17:54:07"} +{"current_steps": 4010, "total_steps": 28254, "loss": 0.7848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.755702728238204e-05, "epoch": 0.28, "percentage": 14.19, "elapsed_time": "6:55:40", "remaining_time": "1 day, 17:53:06"} +{"current_steps": 4020, "total_steps": 28254, "loss": 0.7723, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.754502838067782e-05, "epoch": 0.28, "percentage": 14.23, "elapsed_time": "6:56:43", "remaining_time": "1 day, 17:52:12"} +{"current_steps": 4030, "total_steps": 28254, "loss": 0.7581, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.753300160553136e-05, "epoch": 0.29, "percentage": 14.26, "elapsed_time": "6:57:45", "remaining_time": "1 day, 17:51:07"} +{"current_steps": 4040, "total_steps": 28254, "loss": 0.8092, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.752094697181192e-05, "epoch": 0.29, "percentage": 14.3, "elapsed_time": "6:58:47", "remaining_time": "1 day, 17:50:03"} +{"current_steps": 4050, "total_steps": 28254, "loss": 0.7962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.750886449442318e-05, "epoch": 0.29, "percentage": 14.33, "elapsed_time": "6:59:49", "remaining_time": "1 day, 17:48:56"} +{"current_steps": 4060, "total_steps": 28254, "loss": 0.7947, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.749675418830325e-05, "epoch": 0.29, "percentage": 14.37, "elapsed_time": "7:00:50", "remaining_time": "1 day, 17:47:49"} +{"current_steps": 4070, "total_steps": 28254, "loss": 0.7743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7484616068424656e-05, "epoch": 0.29, "percentage": 14.41, "elapsed_time": "7:01:52", "remaining_time": "1 day, 17:46:48"} +{"current_steps": 4080, "total_steps": 28254, "loss": 0.7677, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7472450149794314e-05, "epoch": 0.29, "percentage": 14.44, "elapsed_time": "7:02:54", "remaining_time": "1 day, 17:45:41"} +{"current_steps": 4090, "total_steps": 28254, "loss": 0.7854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7460256447453486e-05, "epoch": 0.29, "percentage": 14.48, "elapsed_time": "7:03:59", "remaining_time": "1 day, 17:44:58"} +{"current_steps": 4100, "total_steps": 28254, "loss": 0.7867, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.744803497647782e-05, "epoch": 0.29, "percentage": 14.51, "elapsed_time": "7:05:02", "remaining_time": "1 day, 17:43:58"} +{"current_steps": 4110, "total_steps": 28254, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.743578575197726e-05, "epoch": 0.29, "percentage": 14.55, "elapsed_time": "7:06:06", "remaining_time": "1 day, 17:43:09"} +{"current_steps": 4120, "total_steps": 28254, "loss": 0.7739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.742350878909608e-05, "epoch": 0.29, "percentage": 14.58, "elapsed_time": "7:07:09", "remaining_time": "1 day, 17:42:12"} +{"current_steps": 4130, "total_steps": 28254, "loss": 0.8267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.741120410301286e-05, "epoch": 0.29, "percentage": 14.62, "elapsed_time": "7:08:10", "remaining_time": "1 day, 17:41:03"} +{"current_steps": 4140, "total_steps": 28254, "loss": 0.7795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7398871708940426e-05, "epoch": 0.29, "percentage": 14.65, "elapsed_time": "7:09:15", "remaining_time": "1 day, 17:40:17"} +{"current_steps": 4150, "total_steps": 28254, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.738651162212589e-05, "epoch": 0.29, "percentage": 14.69, "elapsed_time": "7:10:16", "remaining_time": "1 day, 17:39:09"} +{"current_steps": 4160, "total_steps": 28254, "loss": 0.7704, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7374123857850575e-05, "epoch": 0.29, "percentage": 14.72, "elapsed_time": "7:11:19", "remaining_time": "1 day, 17:38:07"} +{"current_steps": 4170, "total_steps": 28254, "loss": 0.7591, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.736170843143004e-05, "epoch": 0.3, "percentage": 14.76, "elapsed_time": "7:12:19", "remaining_time": "1 day, 17:36:52"} +{"current_steps": 4180, "total_steps": 28254, "loss": 0.7845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7349265358214043e-05, "epoch": 0.3, "percentage": 14.79, "elapsed_time": "7:13:21", "remaining_time": "1 day, 17:35:50"} +{"current_steps": 4190, "total_steps": 28254, "loss": 0.7719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7336794653586534e-05, "epoch": 0.3, "percentage": 14.83, "elapsed_time": "7:14:22", "remaining_time": "1 day, 17:34:41"} +{"current_steps": 4200, "total_steps": 28254, "loss": 0.7608, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.732429633296558e-05, "epoch": 0.3, "percentage": 14.87, "elapsed_time": "7:15:22", "remaining_time": "1 day, 17:33:24"} +{"current_steps": 4210, "total_steps": 28254, "loss": 0.758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.731177041180346e-05, "epoch": 0.3, "percentage": 14.9, "elapsed_time": "7:16:24", "remaining_time": "1 day, 17:32:26"} +{"current_steps": 4220, "total_steps": 28254, "loss": 0.7861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7299216905586505e-05, "epoch": 0.3, "percentage": 14.94, "elapsed_time": "7:17:27", "remaining_time": "1 day, 17:31:27"} +{"current_steps": 4230, "total_steps": 28254, "loss": 0.7758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.72866358298352e-05, "epoch": 0.3, "percentage": 14.97, "elapsed_time": "7:18:28", "remaining_time": "1 day, 17:30:17"} +{"current_steps": 4240, "total_steps": 28254, "loss": 0.7504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.72740272001041e-05, "epoch": 0.3, "percentage": 15.01, "elapsed_time": "7:19:32", "remaining_time": "1 day, 17:29:26"} +{"current_steps": 4250, "total_steps": 28254, "loss": 0.7682, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.726139103198183e-05, "epoch": 0.3, "percentage": 15.04, "elapsed_time": "7:20:35", "remaining_time": "1 day, 17:28:29"} +{"current_steps": 4260, "total_steps": 28254, "loss": 0.7687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.724872734109106e-05, "epoch": 0.3, "percentage": 15.08, "elapsed_time": "7:21:36", "remaining_time": "1 day, 17:27:19"} +{"current_steps": 4270, "total_steps": 28254, "loss": 0.7583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.723603614308847e-05, "epoch": 0.3, "percentage": 15.11, "elapsed_time": "7:22:39", "remaining_time": "1 day, 17:26:21"} +{"current_steps": 4280, "total_steps": 28254, "loss": 0.8159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7223317453664774e-05, "epoch": 0.3, "percentage": 15.15, "elapsed_time": "7:23:41", "remaining_time": "1 day, 17:25:18"} +{"current_steps": 4290, "total_steps": 28254, "loss": 0.7985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.721057128854467e-05, "epoch": 0.3, "percentage": 15.18, "elapsed_time": "7:24:42", "remaining_time": "1 day, 17:24:10"} +{"current_steps": 4300, "total_steps": 28254, "loss": 0.7919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.719779766348682e-05, "epoch": 0.3, "percentage": 15.22, "elapsed_time": "7:25:48", "remaining_time": "1 day, 17:23:30"} +{"current_steps": 4310, "total_steps": 28254, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7184996594283824e-05, "epoch": 0.31, "percentage": 15.25, "elapsed_time": "7:26:52", "remaining_time": "1 day, 17:22:36"} +{"current_steps": 4320, "total_steps": 28254, "loss": 0.76, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.717216809676224e-05, "epoch": 0.31, "percentage": 15.29, "elapsed_time": "7:27:56", "remaining_time": "1 day, 17:21:41"} +{"current_steps": 4330, "total_steps": 28254, "loss": 0.7879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.715931218678251e-05, "epoch": 0.31, "percentage": 15.33, "elapsed_time": "7:28:59", "remaining_time": "1 day, 17:20:46"} +{"current_steps": 4340, "total_steps": 28254, "loss": 0.7934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.714642888023899e-05, "epoch": 0.31, "percentage": 15.36, "elapsed_time": "7:30:02", "remaining_time": "1 day, 17:19:49"} +{"current_steps": 4350, "total_steps": 28254, "loss": 0.7648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.71335181930599e-05, "epoch": 0.31, "percentage": 15.4, "elapsed_time": "7:31:02", "remaining_time": "1 day, 17:18:35"} +{"current_steps": 4360, "total_steps": 28254, "loss": 0.758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.712058014120729e-05, "epoch": 0.31, "percentage": 15.43, "elapsed_time": "7:32:06", "remaining_time": "1 day, 17:17:41"} +{"current_steps": 4370, "total_steps": 28254, "loss": 0.8095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.710761474067707e-05, "epoch": 0.31, "percentage": 15.47, "elapsed_time": "7:33:10", "remaining_time": "1 day, 17:16:47"} +{"current_steps": 4380, "total_steps": 28254, "loss": 0.7676, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.709462200749897e-05, "epoch": 0.31, "percentage": 15.5, "elapsed_time": "7:34:11", "remaining_time": "1 day, 17:15:38"} +{"current_steps": 4390, "total_steps": 28254, "loss": 0.7818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.708160195773648e-05, "epoch": 0.31, "percentage": 15.54, "elapsed_time": "7:35:14", "remaining_time": "1 day, 17:14:39"} +{"current_steps": 4400, "total_steps": 28254, "loss": 0.7766, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7068554607486866e-05, "epoch": 0.31, "percentage": 15.57, "elapsed_time": "7:36:15", "remaining_time": "1 day, 17:13:31"} +{"current_steps": 4410, "total_steps": 28254, "loss": 0.7824, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.705547997288118e-05, "epoch": 0.31, "percentage": 15.61, "elapsed_time": "7:37:19", "remaining_time": "1 day, 17:12:41"} +{"current_steps": 4420, "total_steps": 28254, "loss": 0.7713, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.704237807008418e-05, "epoch": 0.31, "percentage": 15.64, "elapsed_time": "7:38:20", "remaining_time": "1 day, 17:11:29"} +{"current_steps": 4430, "total_steps": 28254, "loss": 0.7972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.702924891529434e-05, "epoch": 0.31, "percentage": 15.68, "elapsed_time": "7:39:19", "remaining_time": "1 day, 17:10:14"} +{"current_steps": 4440, "total_steps": 28254, "loss": 0.766, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.701609252474384e-05, "epoch": 0.31, "percentage": 15.71, "elapsed_time": "7:40:21", "remaining_time": "1 day, 17:09:07"} +{"current_steps": 4450, "total_steps": 28254, "loss": 0.7817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7002908914698505e-05, "epoch": 0.31, "percentage": 15.75, "elapsed_time": "7:41:22", "remaining_time": "1 day, 17:08:02"} +{"current_steps": 4460, "total_steps": 28254, "loss": 0.7626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.698969810145786e-05, "epoch": 0.32, "percentage": 15.79, "elapsed_time": "7:42:25", "remaining_time": "1 day, 17:06:59"} +{"current_steps": 4470, "total_steps": 28254, "loss": 0.8012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6976460101355004e-05, "epoch": 0.32, "percentage": 15.82, "elapsed_time": "7:43:27", "remaining_time": "1 day, 17:05:58"} +{"current_steps": 4480, "total_steps": 28254, "loss": 0.7746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.696319493075668e-05, "epoch": 0.32, "percentage": 15.86, "elapsed_time": "7:44:29", "remaining_time": "1 day, 17:04:56"} +{"current_steps": 4490, "total_steps": 28254, "loss": 0.8053, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.694990260606324e-05, "epoch": 0.32, "percentage": 15.89, "elapsed_time": "7:45:32", "remaining_time": "1 day, 17:03:54"} +{"current_steps": 4500, "total_steps": 28254, "loss": 0.7903, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6936583143708586e-05, "epoch": 0.32, "percentage": 15.93, "elapsed_time": "7:46:34", "remaining_time": "1 day, 17:02:53"} +{"current_steps": 4510, "total_steps": 28254, "loss": 0.7562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.692323656016016e-05, "epoch": 0.32, "percentage": 15.96, "elapsed_time": "7:47:37", "remaining_time": "1 day, 17:01:57"} +{"current_steps": 4520, "total_steps": 28254, "loss": 0.7919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.690986287191895e-05, "epoch": 0.32, "percentage": 16.0, "elapsed_time": "7:48:40", "remaining_time": "1 day, 17:00:55"} +{"current_steps": 4530, "total_steps": 28254, "loss": 0.7616, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.689646209551947e-05, "epoch": 0.32, "percentage": 16.03, "elapsed_time": "7:49:44", "remaining_time": "1 day, 17:00:05"} +{"current_steps": 4540, "total_steps": 28254, "loss": 0.7718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.688303424752969e-05, "epoch": 0.32, "percentage": 16.07, "elapsed_time": "7:50:48", "remaining_time": "1 day, 16:59:13"} +{"current_steps": 4550, "total_steps": 28254, "loss": 0.7858, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6869579344551073e-05, "epoch": 0.32, "percentage": 16.1, "elapsed_time": "7:51:51", "remaining_time": "1 day, 16:58:15"} +{"current_steps": 4560, "total_steps": 28254, "loss": 0.7657, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6856097403218534e-05, "epoch": 0.32, "percentage": 16.14, "elapsed_time": "7:52:54", "remaining_time": "1 day, 16:57:16"} +{"current_steps": 4570, "total_steps": 28254, "loss": 0.7698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6842588440200405e-05, "epoch": 0.32, "percentage": 16.17, "elapsed_time": "7:53:57", "remaining_time": "1 day, 16:56:19"} +{"current_steps": 4580, "total_steps": 28254, "loss": 0.7716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.682905247219843e-05, "epoch": 0.32, "percentage": 16.21, "elapsed_time": "7:54:59", "remaining_time": "1 day, 16:55:15"} +{"current_steps": 4590, "total_steps": 28254, "loss": 0.7889, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.681548951594774e-05, "epoch": 0.32, "percentage": 16.25, "elapsed_time": "7:56:01", "remaining_time": "1 day, 16:54:12"} +{"current_steps": 4600, "total_steps": 28254, "loss": 0.8046, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.680189958821683e-05, "epoch": 0.33, "percentage": 16.28, "elapsed_time": "7:57:03", "remaining_time": "1 day, 16:53:09"} +{"current_steps": 4610, "total_steps": 28254, "loss": 0.7613, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.678828270580756e-05, "epoch": 0.33, "percentage": 16.32, "elapsed_time": "7:58:07", "remaining_time": "1 day, 16:52:12"} +{"current_steps": 4620, "total_steps": 28254, "loss": 0.7745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.677463888555508e-05, "epoch": 0.33, "percentage": 16.35, "elapsed_time": "7:59:06", "remaining_time": "1 day, 16:50:55"} +{"current_steps": 4630, "total_steps": 28254, "loss": 0.7697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6760968144327876e-05, "epoch": 0.33, "percentage": 16.39, "elapsed_time": "8:00:09", "remaining_time": "1 day, 16:49:57"} +{"current_steps": 4640, "total_steps": 28254, "loss": 0.7795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.674727049902771e-05, "epoch": 0.33, "percentage": 16.42, "elapsed_time": "8:01:11", "remaining_time": "1 day, 16:48:51"} +{"current_steps": 4650, "total_steps": 28254, "loss": 0.7851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6733545966589587e-05, "epoch": 0.33, "percentage": 16.46, "elapsed_time": "8:02:12", "remaining_time": "1 day, 16:47:47"} +{"current_steps": 4660, "total_steps": 28254, "loss": 0.7905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.671979456398179e-05, "epoch": 0.33, "percentage": 16.49, "elapsed_time": "8:03:14", "remaining_time": "1 day, 16:46:43"} +{"current_steps": 4670, "total_steps": 28254, "loss": 0.7617, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.670601630820578e-05, "epoch": 0.33, "percentage": 16.53, "elapsed_time": "8:04:17", "remaining_time": "1 day, 16:45:42"} +{"current_steps": 4680, "total_steps": 28254, "loss": 0.7769, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6692211216296257e-05, "epoch": 0.33, "percentage": 16.56, "elapsed_time": "8:05:20", "remaining_time": "1 day, 16:44:47"} +{"current_steps": 4690, "total_steps": 28254, "loss": 0.7952, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.667837930532108e-05, "epoch": 0.33, "percentage": 16.6, "elapsed_time": "8:06:23", "remaining_time": "1 day, 16:43:46"} +{"current_steps": 4700, "total_steps": 28254, "loss": 0.803, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.666452059238127e-05, "epoch": 0.33, "percentage": 16.63, "elapsed_time": "8:07:25", "remaining_time": "1 day, 16:42:42"} +{"current_steps": 4710, "total_steps": 28254, "loss": 0.7749, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.665063509461097e-05, "epoch": 0.33, "percentage": 16.67, "elapsed_time": "8:08:29", "remaining_time": "1 day, 16:41:48"} +{"current_steps": 4720, "total_steps": 28254, "loss": 0.7641, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6636722829177466e-05, "epoch": 0.33, "percentage": 16.71, "elapsed_time": "8:09:30", "remaining_time": "1 day, 16:40:42"} +{"current_steps": 4730, "total_steps": 28254, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6622783813281114e-05, "epoch": 0.33, "percentage": 16.74, "elapsed_time": "8:10:32", "remaining_time": "1 day, 16:39:39"} +{"current_steps": 4740, "total_steps": 28254, "loss": 0.7696, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6608818064155356e-05, "epoch": 0.34, "percentage": 16.78, "elapsed_time": "8:11:33", "remaining_time": "1 day, 16:38:31"} +{"current_steps": 4750, "total_steps": 28254, "loss": 0.8007, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.659482559906669e-05, "epoch": 0.34, "percentage": 16.81, "elapsed_time": "8:12:35", "remaining_time": "1 day, 16:37:27"} +{"current_steps": 4760, "total_steps": 28254, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.658080643531462e-05, "epoch": 0.34, "percentage": 16.85, "elapsed_time": "8:13:37", "remaining_time": "1 day, 16:36:22"} +{"current_steps": 4770, "total_steps": 28254, "loss": 0.7572, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.656676059023169e-05, "epoch": 0.34, "percentage": 16.88, "elapsed_time": "8:14:39", "remaining_time": "1 day, 16:35:22"} +{"current_steps": 4780, "total_steps": 28254, "loss": 0.7546, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6552688081183405e-05, "epoch": 0.34, "percentage": 16.92, "elapsed_time": "8:15:40", "remaining_time": "1 day, 16:34:13"} +{"current_steps": 4790, "total_steps": 28254, "loss": 0.771, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.653858892556825e-05, "epoch": 0.34, "percentage": 16.95, "elapsed_time": "8:16:43", "remaining_time": "1 day, 16:33:14"} +{"current_steps": 4800, "total_steps": 28254, "loss": 0.7633, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.652446314081765e-05, "epoch": 0.34, "percentage": 16.99, "elapsed_time": "8:17:45", "remaining_time": "1 day, 16:32:10"} +{"current_steps": 4810, "total_steps": 28254, "loss": 0.7614, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.651031074439596e-05, "epoch": 0.34, "percentage": 17.02, "elapsed_time": "8:18:46", "remaining_time": "1 day, 16:31:03"} +{"current_steps": 4820, "total_steps": 28254, "loss": 0.7694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.649613175380043e-05, "epoch": 0.34, "percentage": 17.06, "elapsed_time": "8:19:48", "remaining_time": "1 day, 16:30:00"} +{"current_steps": 4830, "total_steps": 28254, "loss": 0.7628, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.648192618656118e-05, "epoch": 0.34, "percentage": 17.09, "elapsed_time": "8:20:52", "remaining_time": "1 day, 16:29:04"} +{"current_steps": 4840, "total_steps": 28254, "loss": 0.7782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6467694060241206e-05, "epoch": 0.34, "percentage": 17.13, "elapsed_time": "8:21:55", "remaining_time": "1 day, 16:28:05"} +{"current_steps": 4850, "total_steps": 28254, "loss": 0.7816, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.645343539243633e-05, "epoch": 0.34, "percentage": 17.17, "elapsed_time": "8:22:57", "remaining_time": "1 day, 16:27:04"} +{"current_steps": 4860, "total_steps": 28254, "loss": 0.7886, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.643915020077519e-05, "epoch": 0.34, "percentage": 17.2, "elapsed_time": "8:23:58", "remaining_time": "1 day, 16:25:53"} +{"current_steps": 4870, "total_steps": 28254, "loss": 0.7335, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.642483850291922e-05, "epoch": 0.34, "percentage": 17.24, "elapsed_time": "8:25:02", "remaining_time": "1 day, 16:25:01"} +{"current_steps": 4880, "total_steps": 28254, "loss": 0.7666, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.641050031656262e-05, "epoch": 0.35, "percentage": 17.27, "elapsed_time": "8:26:03", "remaining_time": "1 day, 16:23:54"} +{"current_steps": 4890, "total_steps": 28254, "loss": 0.7764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.639613565943233e-05, "epoch": 0.35, "percentage": 17.31, "elapsed_time": "8:27:05", "remaining_time": "1 day, 16:22:48"} +{"current_steps": 4900, "total_steps": 28254, "loss": 0.7386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.638174454928805e-05, "epoch": 0.35, "percentage": 17.34, "elapsed_time": "8:28:05", "remaining_time": "1 day, 16:21:38"} +{"current_steps": 4910, "total_steps": 28254, "loss": 0.7629, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.636732700392215e-05, "epoch": 0.35, "percentage": 17.38, "elapsed_time": "8:29:09", "remaining_time": "1 day, 16:20:45"} +{"current_steps": 4920, "total_steps": 28254, "loss": 0.7725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.635288304115969e-05, "epoch": 0.35, "percentage": 17.41, "elapsed_time": "8:30:13", "remaining_time": "1 day, 16:19:48"} +{"current_steps": 4930, "total_steps": 28254, "loss": 0.7857, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.633841267885841e-05, "epoch": 0.35, "percentage": 17.45, "elapsed_time": "8:31:17", "remaining_time": "1 day, 16:18:56"} +{"current_steps": 4940, "total_steps": 28254, "loss": 0.7632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6323915934908665e-05, "epoch": 0.35, "percentage": 17.48, "elapsed_time": "8:32:21", "remaining_time": "1 day, 16:18:00"} +{"current_steps": 4950, "total_steps": 28254, "loss": 0.7667, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.630939282723344e-05, "epoch": 0.35, "percentage": 17.52, "elapsed_time": "8:33:24", "remaining_time": "1 day, 16:17:02"} +{"current_steps": 4960, "total_steps": 28254, "loss": 0.7853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.629484337378832e-05, "epoch": 0.35, "percentage": 17.56, "elapsed_time": "8:34:27", "remaining_time": "1 day, 16:16:03"} +{"current_steps": 4970, "total_steps": 28254, "loss": 0.7849, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.628026759256145e-05, "epoch": 0.35, "percentage": 17.59, "elapsed_time": "8:35:27", "remaining_time": "1 day, 16:14:53"} +{"current_steps": 4980, "total_steps": 28254, "loss": 0.7754, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.626566550157353e-05, "epoch": 0.35, "percentage": 17.63, "elapsed_time": "8:36:28", "remaining_time": "1 day, 16:13:45"} +{"current_steps": 4990, "total_steps": 28254, "loss": 0.7892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6251037118877784e-05, "epoch": 0.35, "percentage": 17.66, "elapsed_time": "8:37:30", "remaining_time": "1 day, 16:12:39"} +{"current_steps": 5000, "total_steps": 28254, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.623638246255996e-05, "epoch": 0.35, "percentage": 17.7, "elapsed_time": "8:38:32", "remaining_time": "1 day, 16:11:36"} +{"current_steps": 5010, "total_steps": 28254, "loss": 0.7959, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.622170155073825e-05, "epoch": 0.35, "percentage": 17.73, "elapsed_time": "8:39:34", "remaining_time": "1 day, 16:10:36"} +{"current_steps": 5020, "total_steps": 28254, "loss": 0.7871, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6206994401563355e-05, "epoch": 0.36, "percentage": 17.77, "elapsed_time": "8:40:35", "remaining_time": "1 day, 16:09:27"} +{"current_steps": 5030, "total_steps": 28254, "loss": 0.7697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6192261033218384e-05, "epoch": 0.36, "percentage": 17.8, "elapsed_time": "8:41:38", "remaining_time": "1 day, 16:08:27"} +{"current_steps": 5040, "total_steps": 28254, "loss": 0.7742, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.617750146391887e-05, "epoch": 0.36, "percentage": 17.84, "elapsed_time": "8:42:40", "remaining_time": "1 day, 16:07:23"} +{"current_steps": 5050, "total_steps": 28254, "loss": 0.775, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.616271571191273e-05, "epoch": 0.36, "percentage": 17.87, "elapsed_time": "8:43:40", "remaining_time": "1 day, 16:06:12"} +{"current_steps": 5060, "total_steps": 28254, "loss": 0.745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.614790379548027e-05, "epoch": 0.36, "percentage": 17.91, "elapsed_time": "8:44:41", "remaining_time": "1 day, 16:05:02"} +{"current_steps": 5070, "total_steps": 28254, "loss": 0.7829, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.613306573293413e-05, "epoch": 0.36, "percentage": 17.94, "elapsed_time": "8:45:43", "remaining_time": "1 day, 16:04:00"} +{"current_steps": 5080, "total_steps": 28254, "loss": 0.7785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6118201542619285e-05, "epoch": 0.36, "percentage": 17.98, "elapsed_time": "8:46:45", "remaining_time": "1 day, 16:02:59"} +{"current_steps": 5090, "total_steps": 28254, "loss": 0.8053, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6103311242913016e-05, "epoch": 0.36, "percentage": 18.02, "elapsed_time": "8:47:48", "remaining_time": "1 day, 16:02:00"} +{"current_steps": 5100, "total_steps": 28254, "loss": 0.7801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.608839485222486e-05, "epoch": 0.36, "percentage": 18.05, "elapsed_time": "8:48:53", "remaining_time": "1 day, 16:01:10"} +{"current_steps": 5110, "total_steps": 28254, "loss": 0.8004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.607345238899663e-05, "epoch": 0.36, "percentage": 18.09, "elapsed_time": "8:49:56", "remaining_time": "1 day, 16:00:12"} +{"current_steps": 5120, "total_steps": 28254, "loss": 0.7903, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.605848387170238e-05, "epoch": 0.36, "percentage": 18.12, "elapsed_time": "8:50:57", "remaining_time": "1 day, 15:59:04"} +{"current_steps": 5130, "total_steps": 28254, "loss": 0.7794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6043489318848365e-05, "epoch": 0.36, "percentage": 18.16, "elapsed_time": "8:51:57", "remaining_time": "1 day, 15:57:53"} +{"current_steps": 5140, "total_steps": 28254, "loss": 0.7509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.602846874897303e-05, "epoch": 0.36, "percentage": 18.19, "elapsed_time": "8:53:00", "remaining_time": "1 day, 15:56:52"} +{"current_steps": 5150, "total_steps": 28254, "loss": 0.7748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6013422180646983e-05, "epoch": 0.36, "percentage": 18.23, "elapsed_time": "8:54:03", "remaining_time": "1 day, 15:55:54"} +{"current_steps": 5160, "total_steps": 28254, "loss": 0.762, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5998349632472994e-05, "epoch": 0.37, "percentage": 18.26, "elapsed_time": "8:55:04", "remaining_time": "1 day, 15:54:47"} +{"current_steps": 5170, "total_steps": 28254, "loss": 0.7515, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5983251123085925e-05, "epoch": 0.37, "percentage": 18.3, "elapsed_time": "8:56:06", "remaining_time": "1 day, 15:53:44"} +{"current_steps": 5180, "total_steps": 28254, "loss": 0.7714, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.596812667115275e-05, "epoch": 0.37, "percentage": 18.33, "elapsed_time": "8:57:07", "remaining_time": "1 day, 15:52:35"} +{"current_steps": 5190, "total_steps": 28254, "loss": 0.7723, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.595297629537252e-05, "epoch": 0.37, "percentage": 18.37, "elapsed_time": "8:58:09", "remaining_time": "1 day, 15:51:31"} +{"current_steps": 5200, "total_steps": 28254, "loss": 0.7754, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5937800014476334e-05, "epoch": 0.37, "percentage": 18.4, "elapsed_time": "8:59:10", "remaining_time": "1 day, 15:50:26"} +{"current_steps": 5210, "total_steps": 28254, "loss": 0.7633, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5922597847227316e-05, "epoch": 0.37, "percentage": 18.44, "elapsed_time": "9:00:11", "remaining_time": "1 day, 15:49:17"} +{"current_steps": 5220, "total_steps": 28254, "loss": 0.7812, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5907369812420595e-05, "epoch": 0.37, "percentage": 18.48, "elapsed_time": "9:01:15", "remaining_time": "1 day, 15:48:21"} +{"current_steps": 5230, "total_steps": 28254, "loss": 0.7358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5892115928883274e-05, "epoch": 0.37, "percentage": 18.51, "elapsed_time": "9:02:16", "remaining_time": "1 day, 15:47:16"} +{"current_steps": 5240, "total_steps": 28254, "loss": 0.7895, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5876836215474434e-05, "epoch": 0.37, "percentage": 18.55, "elapsed_time": "9:03:19", "remaining_time": "1 day, 15:46:18"} +{"current_steps": 5250, "total_steps": 28254, "loss": 0.7751, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.586153069108507e-05, "epoch": 0.37, "percentage": 18.58, "elapsed_time": "9:04:20", "remaining_time": "1 day, 15:45:11"} +{"current_steps": 5260, "total_steps": 28254, "loss": 0.7407, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.58461993746381e-05, "epoch": 0.37, "percentage": 18.62, "elapsed_time": "9:05:26", "remaining_time": "1 day, 15:44:23"} +{"current_steps": 5270, "total_steps": 28254, "loss": 0.7787, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.583084228508833e-05, "epoch": 0.37, "percentage": 18.65, "elapsed_time": "9:06:29", "remaining_time": "1 day, 15:43:23"} +{"current_steps": 5280, "total_steps": 28254, "loss": 0.7861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.581545944142243e-05, "epoch": 0.37, "percentage": 18.69, "elapsed_time": "9:07:32", "remaining_time": "1 day, 15:42:25"} +{"current_steps": 5290, "total_steps": 28254, "loss": 0.7661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.580005086265888e-05, "epoch": 0.37, "percentage": 18.72, "elapsed_time": "9:08:36", "remaining_time": "1 day, 15:41:33"} +{"current_steps": 5300, "total_steps": 28254, "loss": 0.7507, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.578461656784805e-05, "epoch": 0.38, "percentage": 18.76, "elapsed_time": "9:09:38", "remaining_time": "1 day, 15:40:28"} +{"current_steps": 5310, "total_steps": 28254, "loss": 0.7674, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.576915657607202e-05, "epoch": 0.38, "percentage": 18.79, "elapsed_time": "9:10:41", "remaining_time": "1 day, 15:39:27"} +{"current_steps": 5320, "total_steps": 28254, "loss": 0.7532, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.575367090644471e-05, "epoch": 0.38, "percentage": 18.83, "elapsed_time": "9:11:44", "remaining_time": "1 day, 15:38:32"} +{"current_steps": 5330, "total_steps": 28254, "loss": 0.7624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.573815957811174e-05, "epoch": 0.38, "percentage": 18.86, "elapsed_time": "9:12:47", "remaining_time": "1 day, 15:37:31"} +{"current_steps": 5340, "total_steps": 28254, "loss": 0.8019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5722622610250466e-05, "epoch": 0.38, "percentage": 18.9, "elapsed_time": "9:13:50", "remaining_time": "1 day, 15:36:32"} +{"current_steps": 5350, "total_steps": 28254, "loss": 0.7635, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.570706002206996e-05, "epoch": 0.38, "percentage": 18.94, "elapsed_time": "9:14:53", "remaining_time": "1 day, 15:35:34"} +{"current_steps": 5360, "total_steps": 28254, "loss": 0.762, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.569147183281095e-05, "epoch": 0.38, "percentage": 18.97, "elapsed_time": "9:15:55", "remaining_time": "1 day, 15:34:29"} +{"current_steps": 5370, "total_steps": 28254, "loss": 0.756, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5675858061745814e-05, "epoch": 0.38, "percentage": 19.01, "elapsed_time": "9:16:58", "remaining_time": "1 day, 15:33:32"} +{"current_steps": 5380, "total_steps": 28254, "loss": 0.7495, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.566021872817858e-05, "epoch": 0.38, "percentage": 19.04, "elapsed_time": "9:17:57", "remaining_time": "1 day, 15:32:16"} +{"current_steps": 5390, "total_steps": 28254, "loss": 0.761, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.564455385144486e-05, "epoch": 0.38, "percentage": 19.08, "elapsed_time": "9:18:59", "remaining_time": "1 day, 15:31:14"} +{"current_steps": 5400, "total_steps": 28254, "loss": 0.753, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.562886345091185e-05, "epoch": 0.38, "percentage": 19.11, "elapsed_time": "9:20:03", "remaining_time": "1 day, 15:30:17"} +{"current_steps": 5410, "total_steps": 28254, "loss": 0.76, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.561314754597831e-05, "epoch": 0.38, "percentage": 19.15, "elapsed_time": "9:21:07", "remaining_time": "1 day, 15:29:23"} +{"current_steps": 5420, "total_steps": 28254, "loss": 0.7307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.559740615607453e-05, "epoch": 0.38, "percentage": 19.18, "elapsed_time": "9:22:10", "remaining_time": "1 day, 15:28:22"} +{"current_steps": 5430, "total_steps": 28254, "loss": 0.7455, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.558163930066229e-05, "epoch": 0.38, "percentage": 19.22, "elapsed_time": "9:23:13", "remaining_time": "1 day, 15:27:26"} +{"current_steps": 5440, "total_steps": 28254, "loss": 0.7863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.556584699923488e-05, "epoch": 0.39, "percentage": 19.25, "elapsed_time": "9:24:14", "remaining_time": "1 day, 15:26:18"} +{"current_steps": 5450, "total_steps": 28254, "loss": 0.7518, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.555002927131704e-05, "epoch": 0.39, "percentage": 19.29, "elapsed_time": "9:25:16", "remaining_time": "1 day, 15:25:12"} +{"current_steps": 5460, "total_steps": 28254, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.553418613646494e-05, "epoch": 0.39, "percentage": 19.32, "elapsed_time": "9:26:18", "remaining_time": "1 day, 15:24:11"} +{"current_steps": 5470, "total_steps": 28254, "loss": 0.7715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.551831761426617e-05, "epoch": 0.39, "percentage": 19.36, "elapsed_time": "9:27:20", "remaining_time": "1 day, 15:23:07"} +{"current_steps": 5480, "total_steps": 28254, "loss": 0.7423, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5502423724339706e-05, "epoch": 0.39, "percentage": 19.4, "elapsed_time": "9:28:22", "remaining_time": "1 day, 15:22:03"} +{"current_steps": 5490, "total_steps": 28254, "loss": 0.7504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5486504486335876e-05, "epoch": 0.39, "percentage": 19.43, "elapsed_time": "9:29:24", "remaining_time": "1 day, 15:21:00"} +{"current_steps": 5500, "total_steps": 28254, "loss": 0.7598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.547055991993638e-05, "epoch": 0.39, "percentage": 19.47, "elapsed_time": "9:30:26", "remaining_time": "1 day, 15:19:58"} +{"current_steps": 5510, "total_steps": 28254, "loss": 0.7517, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5454590044854185e-05, "epoch": 0.39, "percentage": 19.5, "elapsed_time": "9:31:31", "remaining_time": "1 day, 15:19:05"} +{"current_steps": 5520, "total_steps": 28254, "loss": 0.7533, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5438594880833586e-05, "epoch": 0.39, "percentage": 19.54, "elapsed_time": "9:32:33", "remaining_time": "1 day, 15:18:03"} +{"current_steps": 5530, "total_steps": 28254, "loss": 0.7872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5422574447650126e-05, "epoch": 0.39, "percentage": 19.57, "elapsed_time": "9:33:36", "remaining_time": "1 day, 15:17:04"} +{"current_steps": 5540, "total_steps": 28254, "loss": 0.7777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.540652876511059e-05, "epoch": 0.39, "percentage": 19.61, "elapsed_time": "9:34:39", "remaining_time": "1 day, 15:16:07"} +{"current_steps": 5550, "total_steps": 28254, "loss": 0.7838, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5390457853052994e-05, "epoch": 0.39, "percentage": 19.64, "elapsed_time": "9:35:44", "remaining_time": "1 day, 15:15:13"} +{"current_steps": 5560, "total_steps": 28254, "loss": 0.7678, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5374361731346526e-05, "epoch": 0.39, "percentage": 19.68, "elapsed_time": "9:36:45", "remaining_time": "1 day, 15:14:08"} +{"current_steps": 5570, "total_steps": 28254, "loss": 0.7444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.535824041989156e-05, "epoch": 0.39, "percentage": 19.71, "elapsed_time": "9:37:48", "remaining_time": "1 day, 15:13:10"} +{"current_steps": 5580, "total_steps": 28254, "loss": 0.7691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.534209393861959e-05, "epoch": 0.39, "percentage": 19.75, "elapsed_time": "9:38:50", "remaining_time": "1 day, 15:12:05"} +{"current_steps": 5590, "total_steps": 28254, "loss": 0.7975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5325922307493274e-05, "epoch": 0.4, "percentage": 19.78, "elapsed_time": "9:39:51", "remaining_time": "1 day, 15:10:56"} +{"current_steps": 5600, "total_steps": 28254, "loss": 0.7718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.530972554650631e-05, "epoch": 0.4, "percentage": 19.82, "elapsed_time": "9:40:53", "remaining_time": "1 day, 15:09:54"} +{"current_steps": 5610, "total_steps": 28254, "loss": 0.7626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.529350367568349e-05, "epoch": 0.4, "percentage": 19.86, "elapsed_time": "9:41:56", "remaining_time": "1 day, 15:08:55"} +{"current_steps": 5620, "total_steps": 28254, "loss": 0.7574, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.527725671508066e-05, "epoch": 0.4, "percentage": 19.89, "elapsed_time": "9:42:58", "remaining_time": "1 day, 15:07:50"} +{"current_steps": 5630, "total_steps": 28254, "loss": 0.7403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5260984684784656e-05, "epoch": 0.4, "percentage": 19.93, "elapsed_time": "9:44:01", "remaining_time": "1 day, 15:06:51"} +{"current_steps": 5640, "total_steps": 28254, "loss": 0.7511, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.524468760491336e-05, "epoch": 0.4, "percentage": 19.96, "elapsed_time": "9:45:02", "remaining_time": "1 day, 15:05:47"} +{"current_steps": 5650, "total_steps": 28254, "loss": 0.7649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.522836549561556e-05, "epoch": 0.4, "percentage": 20.0, "elapsed_time": "9:46:05", "remaining_time": "1 day, 15:04:48"} +{"current_steps": 5660, "total_steps": 28254, "loss": 0.7782, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5212018377071044e-05, "epoch": 0.4, "percentage": 20.03, "elapsed_time": "9:47:09", "remaining_time": "1 day, 15:03:51"} +{"current_steps": 5670, "total_steps": 28254, "loss": 0.784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5195646269490475e-05, "epoch": 0.4, "percentage": 20.07, "elapsed_time": "9:48:10", "remaining_time": "1 day, 15:02:43"} +{"current_steps": 5680, "total_steps": 28254, "loss": 0.7662, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.517924919311545e-05, "epoch": 0.4, "percentage": 20.1, "elapsed_time": "9:49:15", "remaining_time": "1 day, 15:01:54"} +{"current_steps": 5690, "total_steps": 28254, "loss": 0.761, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5162827168218413e-05, "epoch": 0.4, "percentage": 20.14, "elapsed_time": "9:50:16", "remaining_time": "1 day, 15:00:46"} +{"current_steps": 5700, "total_steps": 28254, "loss": 0.7609, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5146380215102666e-05, "epoch": 0.4, "percentage": 20.17, "elapsed_time": "9:51:20", "remaining_time": "1 day, 14:59:50"} +{"current_steps": 5710, "total_steps": 28254, "loss": 0.7946, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.512990835410231e-05, "epoch": 0.4, "percentage": 20.21, "elapsed_time": "9:52:22", "remaining_time": "1 day, 14:58:45"} +{"current_steps": 5720, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5113411605582266e-05, "epoch": 0.4, "percentage": 20.24, "elapsed_time": "9:53:24", "remaining_time": "1 day, 14:57:44"} +{"current_steps": 5730, "total_steps": 28254, "loss": 0.7565, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.509688998993821e-05, "epoch": 0.41, "percentage": 20.28, "elapsed_time": "9:54:27", "remaining_time": "1 day, 14:56:46"} +{"current_steps": 5740, "total_steps": 28254, "loss": 0.776, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5080343527596555e-05, "epoch": 0.41, "percentage": 20.32, "elapsed_time": "9:55:29", "remaining_time": "1 day, 14:55:43"} +{"current_steps": 5750, "total_steps": 28254, "loss": 0.779, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.506377223901447e-05, "epoch": 0.41, "percentage": 20.35, "elapsed_time": "9:56:30", "remaining_time": "1 day, 14:54:36"} +{"current_steps": 5760, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.504717614467977e-05, "epoch": 0.41, "percentage": 20.39, "elapsed_time": "9:57:32", "remaining_time": "1 day, 14:53:33"} +{"current_steps": 5770, "total_steps": 28254, "loss": 0.7812, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5030555265110964e-05, "epoch": 0.41, "percentage": 20.42, "elapsed_time": "9:58:36", "remaining_time": "1 day, 14:52:38"} +{"current_steps": 5780, "total_steps": 28254, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.50139096208572e-05, "epoch": 0.41, "percentage": 20.46, "elapsed_time": "9:59:38", "remaining_time": "1 day, 14:51:33"} +{"current_steps": 5790, "total_steps": 28254, "loss": 0.7773, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.499723923249824e-05, "epoch": 0.41, "percentage": 20.49, "elapsed_time": "10:00:41", "remaining_time": "1 day, 14:50:31"} +{"current_steps": 5800, "total_steps": 28254, "loss": 0.7523, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4980544120644456e-05, "epoch": 0.41, "percentage": 20.53, "elapsed_time": "10:01:43", "remaining_time": "1 day, 14:49:28"} +{"current_steps": 5810, "total_steps": 28254, "loss": 0.748, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4963824305936764e-05, "epoch": 0.41, "percentage": 20.56, "elapsed_time": "10:02:44", "remaining_time": "1 day, 14:48:22"} +{"current_steps": 5820, "total_steps": 28254, "loss": 0.7493, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.494707980904662e-05, "epoch": 0.41, "percentage": 20.6, "elapsed_time": "10:03:47", "remaining_time": "1 day, 14:47:23"} +{"current_steps": 5830, "total_steps": 28254, "loss": 0.7691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4930310650676026e-05, "epoch": 0.41, "percentage": 20.63, "elapsed_time": "10:04:51", "remaining_time": "1 day, 14:46:28"} +{"current_steps": 5840, "total_steps": 28254, "loss": 0.7611, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.491351685155744e-05, "epoch": 0.41, "percentage": 20.67, "elapsed_time": "10:05:53", "remaining_time": "1 day, 14:45:24"} +{"current_steps": 5850, "total_steps": 28254, "loss": 0.7332, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4896698432453804e-05, "epoch": 0.41, "percentage": 20.71, "elapsed_time": "10:06:54", "remaining_time": "1 day, 14:44:17"} +{"current_steps": 5860, "total_steps": 28254, "loss": 0.7486, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.487985541415849e-05, "epoch": 0.41, "percentage": 20.74, "elapsed_time": "10:07:53", "remaining_time": "1 day, 14:43:04"} +{"current_steps": 5870, "total_steps": 28254, "loss": 0.7807, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.486298781749528e-05, "epoch": 0.42, "percentage": 20.78, "elapsed_time": "10:08:57", "remaining_time": "1 day, 14:42:06"} +{"current_steps": 5880, "total_steps": 28254, "loss": 0.7707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.484609566331837e-05, "epoch": 0.42, "percentage": 20.81, "elapsed_time": "10:09:59", "remaining_time": "1 day, 14:41:02"} +{"current_steps": 5890, "total_steps": 28254, "loss": 0.7831, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.482917897251227e-05, "epoch": 0.42, "percentage": 20.85, "elapsed_time": "10:10:59", "remaining_time": "1 day, 14:39:55"} +{"current_steps": 5900, "total_steps": 28254, "loss": 0.7667, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.481223776599188e-05, "epoch": 0.42, "percentage": 20.88, "elapsed_time": "10:12:02", "remaining_time": "1 day, 14:38:55"} +{"current_steps": 5910, "total_steps": 28254, "loss": 0.7681, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.479527206470238e-05, "epoch": 0.42, "percentage": 20.92, "elapsed_time": "10:13:05", "remaining_time": "1 day, 14:37:54"} +{"current_steps": 5920, "total_steps": 28254, "loss": 0.7836, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.47782818896192e-05, "epoch": 0.42, "percentage": 20.95, "elapsed_time": "10:14:07", "remaining_time": "1 day, 14:36:51"} +{"current_steps": 5930, "total_steps": 28254, "loss": 0.7464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4761267261748106e-05, "epoch": 0.42, "percentage": 20.99, "elapsed_time": "10:15:11", "remaining_time": "1 day, 14:35:55"} +{"current_steps": 5940, "total_steps": 28254, "loss": 0.7858, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.474422820212504e-05, "epoch": 0.42, "percentage": 21.02, "elapsed_time": "10:16:13", "remaining_time": "1 day, 14:34:52"} +{"current_steps": 5950, "total_steps": 28254, "loss": 0.7458, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.472716473181617e-05, "epoch": 0.42, "percentage": 21.06, "elapsed_time": "10:17:16", "remaining_time": "1 day, 14:33:55"} +{"current_steps": 5960, "total_steps": 28254, "loss": 0.7579, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4710076871917825e-05, "epoch": 0.42, "percentage": 21.09, "elapsed_time": "10:18:17", "remaining_time": "1 day, 14:32:45"} +{"current_steps": 5970, "total_steps": 28254, "loss": 0.7861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4692964643556526e-05, "epoch": 0.42, "percentage": 21.13, "elapsed_time": "10:19:20", "remaining_time": "1 day, 14:31:49"} +{"current_steps": 5980, "total_steps": 28254, "loss": 0.7688, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.467582806788887e-05, "epoch": 0.42, "percentage": 21.17, "elapsed_time": "10:20:21", "remaining_time": "1 day, 14:30:40"} +{"current_steps": 5990, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4658667166101605e-05, "epoch": 0.42, "percentage": 21.2, "elapsed_time": "10:21:23", "remaining_time": "1 day, 14:29:37"} +{"current_steps": 6000, "total_steps": 28254, "loss": 0.7929, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.464148195941152e-05, "epoch": 0.42, "percentage": 21.24, "elapsed_time": "10:22:24", "remaining_time": "1 day, 14:28:29"} +{"current_steps": 6010, "total_steps": 28254, "loss": 0.7441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.462427246906548e-05, "epoch": 0.43, "percentage": 21.27, "elapsed_time": "10:23:24", "remaining_time": "1 day, 14:27:21"} +{"current_steps": 6020, "total_steps": 28254, "loss": 0.746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.460703871634035e-05, "epoch": 0.43, "percentage": 21.31, "elapsed_time": "10:24:26", "remaining_time": "1 day, 14:26:17"} +{"current_steps": 6030, "total_steps": 28254, "loss": 0.7437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4589780722542994e-05, "epoch": 0.43, "percentage": 21.34, "elapsed_time": "10:25:29", "remaining_time": "1 day, 14:25:16"} +{"current_steps": 6040, "total_steps": 28254, "loss": 0.7837, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4572498509010275e-05, "epoch": 0.43, "percentage": 21.38, "elapsed_time": "10:26:31", "remaining_time": "1 day, 14:24:15"} +{"current_steps": 6050, "total_steps": 28254, "loss": 0.7534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4555192097108954e-05, "epoch": 0.43, "percentage": 21.41, "elapsed_time": "10:27:34", "remaining_time": "1 day, 14:23:14"} +{"current_steps": 6060, "total_steps": 28254, "loss": 0.7585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4537861508235746e-05, "epoch": 0.43, "percentage": 21.45, "elapsed_time": "10:28:37", "remaining_time": "1 day, 14:22:14"} +{"current_steps": 6070, "total_steps": 28254, "loss": 0.7431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.452050676381725e-05, "epoch": 0.43, "percentage": 21.48, "elapsed_time": "10:29:42", "remaining_time": "1 day, 14:21:23"} +{"current_steps": 6080, "total_steps": 28254, "loss": 0.769, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.450312788530991e-05, "epoch": 0.43, "percentage": 21.52, "elapsed_time": "10:30:44", "remaining_time": "1 day, 14:20:21"} +{"current_steps": 6090, "total_steps": 28254, "loss": 0.7781, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.448572489420003e-05, "epoch": 0.43, "percentage": 21.55, "elapsed_time": "10:31:47", "remaining_time": "1 day, 14:19:20"} +{"current_steps": 6100, "total_steps": 28254, "loss": 0.7682, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4468297812003724e-05, "epoch": 0.43, "percentage": 21.59, "elapsed_time": "10:32:50", "remaining_time": "1 day, 14:18:22"} +{"current_steps": 6110, "total_steps": 28254, "loss": 0.8062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.445084666026688e-05, "epoch": 0.43, "percentage": 21.63, "elapsed_time": "10:33:54", "remaining_time": "1 day, 14:17:26"} +{"current_steps": 6120, "total_steps": 28254, "loss": 0.7512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.443337146056515e-05, "epoch": 0.43, "percentage": 21.66, "elapsed_time": "10:34:58", "remaining_time": "1 day, 14:16:28"} +{"current_steps": 6130, "total_steps": 28254, "loss": 0.7637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.441587223450391e-05, "epoch": 0.43, "percentage": 21.7, "elapsed_time": "10:36:02", "remaining_time": "1 day, 14:15:32"} +{"current_steps": 6140, "total_steps": 28254, "loss": 0.7575, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4398349003718257e-05, "epoch": 0.43, "percentage": 21.73, "elapsed_time": "10:37:02", "remaining_time": "1 day, 14:14:22"} +{"current_steps": 6150, "total_steps": 28254, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.438080178987296e-05, "epoch": 0.44, "percentage": 21.77, "elapsed_time": "10:38:04", "remaining_time": "1 day, 14:13:21"} +{"current_steps": 6160, "total_steps": 28254, "loss": 0.7705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.436323061466242e-05, "epoch": 0.44, "percentage": 21.8, "elapsed_time": "10:39:06", "remaining_time": "1 day, 14:12:15"} +{"current_steps": 6170, "total_steps": 28254, "loss": 0.7726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.434739608795997e-05, "epoch": 0.44, "percentage": 21.84, "elapsed_time": "10:40:07", "remaining_time": "1 day, 14:11:11"} +{"current_steps": 6180, "total_steps": 28254, "loss": 0.7431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.432977944602969e-05, "epoch": 0.44, "percentage": 21.87, "elapsed_time": "10:41:09", "remaining_time": "1 day, 14:10:08"} +{"current_steps": 6190, "total_steps": 28254, "loss": 0.7338, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.431390403463827e-05, "epoch": 0.44, "percentage": 21.91, "elapsed_time": "10:42:12", "remaining_time": "1 day, 14:09:08"} +{"current_steps": 6200, "total_steps": 28254, "loss": 0.7498, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.429624200461494e-05, "epoch": 0.44, "percentage": 21.94, "elapsed_time": "10:43:12", "remaining_time": "1 day, 14:07:56"} +{"current_steps": 6210, "total_steps": 28254, "loss": 0.7325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4278556117771474e-05, "epoch": 0.44, "percentage": 21.98, "elapsed_time": "10:44:13", "remaining_time": "1 day, 14:06:51"} +{"current_steps": 6220, "total_steps": 28254, "loss": 0.7703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4260846395973755e-05, "epoch": 0.44, "percentage": 22.01, "elapsed_time": "10:45:15", "remaining_time": "1 day, 14:05:47"} +{"current_steps": 6230, "total_steps": 28254, "loss": 0.7717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.424311286111709e-05, "epoch": 0.44, "percentage": 22.05, "elapsed_time": "10:46:18", "remaining_time": "1 day, 14:04:47"} +{"current_steps": 6240, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.422535553512627e-05, "epoch": 0.44, "percentage": 22.09, "elapsed_time": "10:47:21", "remaining_time": "1 day, 14:03:47"} +{"current_steps": 6250, "total_steps": 28254, "loss": 0.7564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.420757443995548e-05, "epoch": 0.44, "percentage": 22.12, "elapsed_time": "10:48:23", "remaining_time": "1 day, 14:02:44"} +{"current_steps": 6260, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4189769597588294e-05, "epoch": 0.44, "percentage": 22.16, "elapsed_time": "10:49:25", "remaining_time": "1 day, 14:01:40"} +{"current_steps": 6270, "total_steps": 28254, "loss": 0.7419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.417194103003765e-05, "epoch": 0.44, "percentage": 22.19, "elapsed_time": "10:50:29", "remaining_time": "1 day, 14:00:44"} +{"current_steps": 6280, "total_steps": 28254, "loss": 0.7456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4154088759345805e-05, "epoch": 0.44, "percentage": 22.23, "elapsed_time": "10:51:33", "remaining_time": "1 day, 13:59:48"} +{"current_steps": 6290, "total_steps": 28254, "loss": 0.7672, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4136212807584345e-05, "epoch": 0.45, "percentage": 22.26, "elapsed_time": "10:52:35", "remaining_time": "1 day, 13:58:46"} +{"current_steps": 6300, "total_steps": 28254, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.411831319685412e-05, "epoch": 0.45, "percentage": 22.3, "elapsed_time": "10:53:37", "remaining_time": "1 day, 13:57:43"} +{"current_steps": 6310, "total_steps": 28254, "loss": 0.7847, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.410038994928522e-05, "epoch": 0.45, "percentage": 22.33, "elapsed_time": "10:54:40", "remaining_time": "1 day, 13:56:43"} +{"current_steps": 6320, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.408244308703699e-05, "epoch": 0.45, "percentage": 22.37, "elapsed_time": "10:55:42", "remaining_time": "1 day, 13:55:41"} +{"current_steps": 6330, "total_steps": 28254, "loss": 0.7509, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.406447263229792e-05, "epoch": 0.45, "percentage": 22.4, "elapsed_time": "10:56:45", "remaining_time": "1 day, 13:54:40"} +{"current_steps": 6340, "total_steps": 28254, "loss": 0.749, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4046478607285725e-05, "epoch": 0.45, "percentage": 22.44, "elapsed_time": "10:57:47", "remaining_time": "1 day, 13:53:36"} +{"current_steps": 6350, "total_steps": 28254, "loss": 0.74, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.402846103424722e-05, "epoch": 0.45, "percentage": 22.47, "elapsed_time": "10:58:49", "remaining_time": "1 day, 13:52:35"} +{"current_steps": 6360, "total_steps": 28254, "loss": 0.7405, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.401041993545837e-05, "epoch": 0.45, "percentage": 22.51, "elapsed_time": "10:59:53", "remaining_time": "1 day, 13:51:37"} +{"current_steps": 6370, "total_steps": 28254, "loss": 0.7815, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.399235533322419e-05, "epoch": 0.45, "percentage": 22.55, "elapsed_time": "11:00:55", "remaining_time": "1 day, 13:50:35"} +{"current_steps": 6380, "total_steps": 28254, "loss": 0.7583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.397426724987876e-05, "epoch": 0.45, "percentage": 22.58, "elapsed_time": "11:01:58", "remaining_time": "1 day, 13:49:36"} +{"current_steps": 6390, "total_steps": 28254, "loss": 0.7438, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3956155707785204e-05, "epoch": 0.45, "percentage": 22.62, "elapsed_time": "11:03:02", "remaining_time": "1 day, 13:48:38"} +{"current_steps": 6400, "total_steps": 28254, "loss": 0.7448, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.393802072933566e-05, "epoch": 0.45, "percentage": 22.65, "elapsed_time": "11:04:04", "remaining_time": "1 day, 13:47:37"} +{"current_steps": 6410, "total_steps": 28254, "loss": 0.7583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.39198623369512e-05, "epoch": 0.45, "percentage": 22.69, "elapsed_time": "11:05:06", "remaining_time": "1 day, 13:46:32"} +{"current_steps": 6420, "total_steps": 28254, "loss": 0.7528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.390168055308189e-05, "epoch": 0.45, "percentage": 22.72, "elapsed_time": "11:06:06", "remaining_time": "1 day, 13:45:23"} +{"current_steps": 6430, "total_steps": 28254, "loss": 0.7568, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.388347540020669e-05, "epoch": 0.46, "percentage": 22.76, "elapsed_time": "11:07:10", "remaining_time": "1 day, 13:44:27"} +{"current_steps": 6440, "total_steps": 28254, "loss": 0.7638, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.386524690083343e-05, "epoch": 0.46, "percentage": 22.79, "elapsed_time": "11:08:11", "remaining_time": "1 day, 13:43:21"} +{"current_steps": 6450, "total_steps": 28254, "loss": 0.7391, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3846995077498875e-05, "epoch": 0.46, "percentage": 22.83, "elapsed_time": "11:09:16", "remaining_time": "1 day, 13:42:26"} +{"current_steps": 6460, "total_steps": 28254, "loss": 0.7421, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.382871995276856e-05, "epoch": 0.46, "percentage": 22.86, "elapsed_time": "11:10:17", "remaining_time": "1 day, 13:41:20"} +{"current_steps": 6470, "total_steps": 28254, "loss": 0.7869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3810421549236845e-05, "epoch": 0.46, "percentage": 22.9, "elapsed_time": "11:11:19", "remaining_time": "1 day, 13:40:18"} +{"current_steps": 6480, "total_steps": 28254, "loss": 0.7767, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.37920998895269e-05, "epoch": 0.46, "percentage": 22.93, "elapsed_time": "11:12:20", "remaining_time": "1 day, 13:39:12"} +{"current_steps": 6490, "total_steps": 28254, "loss": 0.7687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.37737549962906e-05, "epoch": 0.46, "percentage": 22.97, "elapsed_time": "11:13:22", "remaining_time": "1 day, 13:38:09"} +{"current_steps": 6500, "total_steps": 28254, "loss": 0.7374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.375538689220858e-05, "epoch": 0.46, "percentage": 23.01, "elapsed_time": "11:14:26", "remaining_time": "1 day, 13:37:12"} +{"current_steps": 6510, "total_steps": 28254, "loss": 0.7617, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.373699559999017e-05, "epoch": 0.46, "percentage": 23.04, "elapsed_time": "11:15:30", "remaining_time": "1 day, 13:36:14"} +{"current_steps": 6520, "total_steps": 28254, "loss": 0.7686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.371858114237335e-05, "epoch": 0.46, "percentage": 23.08, "elapsed_time": "11:16:34", "remaining_time": "1 day, 13:35:20"} +{"current_steps": 6530, "total_steps": 28254, "loss": 0.739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3700143542124745e-05, "epoch": 0.46, "percentage": 23.11, "elapsed_time": "11:17:36", "remaining_time": "1 day, 13:34:14"} +{"current_steps": 6540, "total_steps": 28254, "loss": 0.7728, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.36816828220396e-05, "epoch": 0.46, "percentage": 23.15, "elapsed_time": "11:18:39", "remaining_time": "1 day, 13:33:17"} +{"current_steps": 6550, "total_steps": 28254, "loss": 0.7622, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3663199004941756e-05, "epoch": 0.46, "percentage": 23.18, "elapsed_time": "11:19:41", "remaining_time": "1 day, 13:32:11"} +{"current_steps": 6560, "total_steps": 28254, "loss": 0.7655, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.364469211368358e-05, "epoch": 0.46, "percentage": 23.22, "elapsed_time": "11:20:44", "remaining_time": "1 day, 13:31:13"} +{"current_steps": 6570, "total_steps": 28254, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.362616217114599e-05, "epoch": 0.47, "percentage": 23.25, "elapsed_time": "11:21:49", "remaining_time": "1 day, 13:30:18"} +{"current_steps": 6580, "total_steps": 28254, "loss": 0.7899, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.360760920023839e-05, "epoch": 0.47, "percentage": 23.29, "elapsed_time": "11:22:50", "remaining_time": "1 day, 13:29:13"} +{"current_steps": 6590, "total_steps": 28254, "loss": 0.7411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3589033223898654e-05, "epoch": 0.47, "percentage": 23.32, "elapsed_time": "11:23:55", "remaining_time": "1 day, 13:28:18"} +{"current_steps": 6600, "total_steps": 28254, "loss": 0.7544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.357043426509312e-05, "epoch": 0.47, "percentage": 23.36, "elapsed_time": "11:24:56", "remaining_time": "1 day, 13:27:13"} +{"current_steps": 6610, "total_steps": 28254, "loss": 0.7661, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3551812346816514e-05, "epoch": 0.47, "percentage": 23.39, "elapsed_time": "11:25:58", "remaining_time": "1 day, 13:26:09"} +{"current_steps": 6620, "total_steps": 28254, "loss": 0.7741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3533167492091965e-05, "epoch": 0.47, "percentage": 23.43, "elapsed_time": "11:27:01", "remaining_time": "1 day, 13:25:09"} +{"current_steps": 6630, "total_steps": 28254, "loss": 0.7939, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.351449972397095e-05, "epoch": 0.47, "percentage": 23.47, "elapsed_time": "11:28:04", "remaining_time": "1 day, 13:24:09"} +{"current_steps": 6640, "total_steps": 28254, "loss": 0.7487, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3495809065533275e-05, "epoch": 0.47, "percentage": 23.5, "elapsed_time": "11:29:08", "remaining_time": "1 day, 13:23:15"} +{"current_steps": 6650, "total_steps": 28254, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.347709553988707e-05, "epoch": 0.47, "percentage": 23.54, "elapsed_time": "11:30:09", "remaining_time": "1 day, 13:22:08"} +{"current_steps": 6660, "total_steps": 28254, "loss": 0.74, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.345835917016869e-05, "epoch": 0.47, "percentage": 23.57, "elapsed_time": "11:31:12", "remaining_time": "1 day, 13:21:07"} +{"current_steps": 6670, "total_steps": 28254, "loss": 0.7471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3439599979542775e-05, "epoch": 0.47, "percentage": 23.61, "elapsed_time": "11:32:13", "remaining_time": "1 day, 13:20:01"} +{"current_steps": 6680, "total_steps": 28254, "loss": 0.7852, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.342081799120216e-05, "epoch": 0.47, "percentage": 23.64, "elapsed_time": "11:33:14", "remaining_time": "1 day, 13:18:54"} +{"current_steps": 6690, "total_steps": 28254, "loss": 0.7979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3402013228367866e-05, "epoch": 0.47, "percentage": 23.68, "elapsed_time": "11:34:17", "remaining_time": "1 day, 13:17:55"} +{"current_steps": 6700, "total_steps": 28254, "loss": 0.766, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3383185714289075e-05, "epoch": 0.47, "percentage": 23.71, "elapsed_time": "11:35:18", "remaining_time": "1 day, 13:16:50"} +{"current_steps": 6710, "total_steps": 28254, "loss": 0.7547, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.336433547224311e-05, "epoch": 0.47, "percentage": 23.75, "elapsed_time": "11:36:22", "remaining_time": "1 day, 13:15:51"} +{"current_steps": 6720, "total_steps": 28254, "loss": 0.7385, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.334546252553537e-05, "epoch": 0.48, "percentage": 23.78, "elapsed_time": "11:37:25", "remaining_time": "1 day, 13:14:51"} +{"current_steps": 6730, "total_steps": 28254, "loss": 0.7328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.332656689749933e-05, "epoch": 0.48, "percentage": 23.82, "elapsed_time": "11:38:27", "remaining_time": "1 day, 13:13:50"} +{"current_steps": 6740, "total_steps": 28254, "loss": 0.8058, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3307648611496534e-05, "epoch": 0.48, "percentage": 23.86, "elapsed_time": "11:39:30", "remaining_time": "1 day, 13:12:48"} +{"current_steps": 6750, "total_steps": 28254, "loss": 0.7683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.32887076909165e-05, "epoch": 0.48, "percentage": 23.89, "elapsed_time": "11:40:33", "remaining_time": "1 day, 13:11:49"} +{"current_steps": 6760, "total_steps": 28254, "loss": 0.772, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.326974415917675e-05, "epoch": 0.48, "percentage": 23.93, "elapsed_time": "11:41:35", "remaining_time": "1 day, 13:10:46"} +{"current_steps": 6770, "total_steps": 28254, "loss": 0.769, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.325075803972277e-05, "epoch": 0.48, "percentage": 23.96, "elapsed_time": "11:42:36", "remaining_time": "1 day, 13:09:39"} +{"current_steps": 6780, "total_steps": 28254, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3231749356027953e-05, "epoch": 0.48, "percentage": 24.0, "elapsed_time": "11:43:39", "remaining_time": "1 day, 13:08:40"} +{"current_steps": 6790, "total_steps": 28254, "loss": 0.7345, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.32127181315936e-05, "epoch": 0.48, "percentage": 24.03, "elapsed_time": "11:44:42", "remaining_time": "1 day, 13:07:39"} +{"current_steps": 6800, "total_steps": 28254, "loss": 0.753, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.319366438994887e-05, "epoch": 0.48, "percentage": 24.07, "elapsed_time": "11:45:45", "remaining_time": "1 day, 13:06:38"} +{"current_steps": 6810, "total_steps": 28254, "loss": 0.7583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3174588154650786e-05, "epoch": 0.48, "percentage": 24.1, "elapsed_time": "11:46:48", "remaining_time": "1 day, 13:05:39"} +{"current_steps": 6820, "total_steps": 28254, "loss": 0.758, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3155489449284145e-05, "epoch": 0.48, "percentage": 24.14, "elapsed_time": "11:47:50", "remaining_time": "1 day, 13:04:35"} +{"current_steps": 6830, "total_steps": 28254, "loss": 0.7883, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.313636829746155e-05, "epoch": 0.48, "percentage": 24.17, "elapsed_time": "11:48:53", "remaining_time": "1 day, 13:03:38"} +{"current_steps": 6840, "total_steps": 28254, "loss": 0.7471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.311722472282336e-05, "epoch": 0.48, "percentage": 24.21, "elapsed_time": "11:49:56", "remaining_time": "1 day, 13:02:37"} +{"current_steps": 6850, "total_steps": 28254, "loss": 0.7488, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.309805874903764e-05, "epoch": 0.48, "percentage": 24.24, "elapsed_time": "11:50:59", "remaining_time": "1 day, 13:01:35"} +{"current_steps": 6860, "total_steps": 28254, "loss": 0.7445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.307887039980014e-05, "epoch": 0.49, "percentage": 24.28, "elapsed_time": "11:52:02", "remaining_time": "1 day, 13:00:36"} +{"current_steps": 6870, "total_steps": 28254, "loss": 0.7558, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.30596596988343e-05, "epoch": 0.49, "percentage": 24.32, "elapsed_time": "11:53:04", "remaining_time": "1 day, 12:59:35"} +{"current_steps": 6880, "total_steps": 28254, "loss": 0.7653, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3040426669891185e-05, "epoch": 0.49, "percentage": 24.35, "elapsed_time": "11:54:08", "remaining_time": "1 day, 12:58:36"} +{"current_steps": 6890, "total_steps": 28254, "loss": 0.7492, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3021171336749456e-05, "epoch": 0.49, "percentage": 24.39, "elapsed_time": "11:55:10", "remaining_time": "1 day, 12:57:34"} +{"current_steps": 6900, "total_steps": 28254, "loss": 0.7834, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3001893723215345e-05, "epoch": 0.49, "percentage": 24.42, "elapsed_time": "11:56:14", "remaining_time": "1 day, 12:56:37"} +{"current_steps": 6910, "total_steps": 28254, "loss": 0.7641, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2982593853122665e-05, "epoch": 0.49, "percentage": 24.46, "elapsed_time": "11:57:19", "remaining_time": "1 day, 12:55:41"} +{"current_steps": 6920, "total_steps": 28254, "loss": 0.7951, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2963271750332715e-05, "epoch": 0.49, "percentage": 24.49, "elapsed_time": "11:58:21", "remaining_time": "1 day, 12:54:38"} +{"current_steps": 6930, "total_steps": 28254, "loss": 0.7493, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.294392743873427e-05, "epoch": 0.49, "percentage": 24.53, "elapsed_time": "11:59:23", "remaining_time": "1 day, 12:53:37"} +{"current_steps": 6940, "total_steps": 28254, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2924560942243594e-05, "epoch": 0.49, "percentage": 24.56, "elapsed_time": "12:00:26", "remaining_time": "1 day, 12:52:35"} +{"current_steps": 6950, "total_steps": 28254, "loss": 0.7427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2905172284804366e-05, "epoch": 0.49, "percentage": 24.6, "elapsed_time": "12:01:28", "remaining_time": "1 day, 12:51:33"} +{"current_steps": 6960, "total_steps": 28254, "loss": 0.7733, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.288576149038767e-05, "epoch": 0.49, "percentage": 24.63, "elapsed_time": "12:02:32", "remaining_time": "1 day, 12:50:35"} +{"current_steps": 6970, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.286632858299193e-05, "epoch": 0.49, "percentage": 24.67, "elapsed_time": "12:03:34", "remaining_time": "1 day, 12:49:34"} +{"current_steps": 6980, "total_steps": 28254, "loss": 0.7715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.284687358664296e-05, "epoch": 0.49, "percentage": 24.7, "elapsed_time": "12:04:37", "remaining_time": "1 day, 12:48:32"} +{"current_steps": 6990, "total_steps": 28254, "loss": 0.7389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2827396525393834e-05, "epoch": 0.49, "percentage": 24.74, "elapsed_time": "12:05:40", "remaining_time": "1 day, 12:47:32"} +{"current_steps": 7000, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.280789742332494e-05, "epoch": 0.5, "percentage": 24.78, "elapsed_time": "12:06:42", "remaining_time": "1 day, 12:46:30"} +{"current_steps": 7010, "total_steps": 28254, "loss": 0.7295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.27883763045439e-05, "epoch": 0.5, "percentage": 24.81, "elapsed_time": "12:07:45", "remaining_time": "1 day, 12:45:30"} +{"current_steps": 7020, "total_steps": 28254, "loss": 0.7567, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2768833193185555e-05, "epoch": 0.5, "percentage": 24.85, "elapsed_time": "12:08:48", "remaining_time": "1 day, 12:44:29"} +{"current_steps": 7030, "total_steps": 28254, "loss": 0.7474, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2749268113411945e-05, "epoch": 0.5, "percentage": 24.88, "elapsed_time": "12:09:49", "remaining_time": "1 day, 12:43:24"} +{"current_steps": 7040, "total_steps": 28254, "loss": 0.7627, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.272968108941226e-05, "epoch": 0.5, "percentage": 24.92, "elapsed_time": "12:10:52", "remaining_time": "1 day, 12:42:22"} +{"current_steps": 7050, "total_steps": 28254, "loss": 0.7624, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2710072145402834e-05, "epoch": 0.5, "percentage": 24.95, "elapsed_time": "12:11:53", "remaining_time": "1 day, 12:41:15"} +{"current_steps": 7060, "total_steps": 28254, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.269044130562709e-05, "epoch": 0.5, "percentage": 24.99, "elapsed_time": "12:12:55", "remaining_time": "1 day, 12:40:12"} +{"current_steps": 7070, "total_steps": 28254, "loss": 0.7312, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.267078859435554e-05, "epoch": 0.5, "percentage": 25.02, "elapsed_time": "12:13:57", "remaining_time": "1 day, 12:39:09"} +{"current_steps": 7080, "total_steps": 28254, "loss": 0.728, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.265111403588571e-05, "epoch": 0.5, "percentage": 25.06, "elapsed_time": "12:14:57", "remaining_time": "1 day, 12:38:00"} +{"current_steps": 7090, "total_steps": 28254, "loss": 0.7289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.263141765454215e-05, "epoch": 0.5, "percentage": 25.09, "elapsed_time": "12:15:59", "remaining_time": "1 day, 12:36:58"} +{"current_steps": 7100, "total_steps": 28254, "loss": 0.7292, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.261169947467639e-05, "epoch": 0.5, "percentage": 25.13, "elapsed_time": "12:17:01", "remaining_time": "1 day, 12:35:54"} +{"current_steps": 7110, "total_steps": 28254, "loss": 0.745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.259195952066693e-05, "epoch": 0.5, "percentage": 25.16, "elapsed_time": "12:18:02", "remaining_time": "1 day, 12:34:49"} +{"current_steps": 7120, "total_steps": 28254, "loss": 0.7376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.257219781691914e-05, "epoch": 0.5, "percentage": 25.2, "elapsed_time": "12:19:02", "remaining_time": "1 day, 12:33:39"} +{"current_steps": 7130, "total_steps": 28254, "loss": 0.7655, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.255241438786533e-05, "epoch": 0.5, "percentage": 25.24, "elapsed_time": "12:20:05", "remaining_time": "1 day, 12:32:39"} +{"current_steps": 7140, "total_steps": 28254, "loss": 0.7414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.253260925796465e-05, "epoch": 0.51, "percentage": 25.27, "elapsed_time": "12:21:04", "remaining_time": "1 day, 12:31:29"} +{"current_steps": 7150, "total_steps": 28254, "loss": 0.7371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.251278245170308e-05, "epoch": 0.51, "percentage": 25.31, "elapsed_time": "12:22:08", "remaining_time": "1 day, 12:30:29"} +{"current_steps": 7160, "total_steps": 28254, "loss": 0.7798, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.249293399359341e-05, "epoch": 0.51, "percentage": 25.34, "elapsed_time": "12:23:10", "remaining_time": "1 day, 12:29:27"} +{"current_steps": 7170, "total_steps": 28254, "loss": 0.7531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.247306390817518e-05, "epoch": 0.51, "percentage": 25.38, "elapsed_time": "12:24:11", "remaining_time": "1 day, 12:28:21"} +{"current_steps": 7180, "total_steps": 28254, "loss": 0.7621, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.245317222001467e-05, "epoch": 0.51, "percentage": 25.41, "elapsed_time": "12:25:14", "remaining_time": "1 day, 12:27:22"} +{"current_steps": 7190, "total_steps": 28254, "loss": 0.7582, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.243325895370489e-05, "epoch": 0.51, "percentage": 25.45, "elapsed_time": "12:26:18", "remaining_time": "1 day, 12:26:24"} +{"current_steps": 7200, "total_steps": 28254, "loss": 0.7491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2413324133865516e-05, "epoch": 0.51, "percentage": 25.48, "elapsed_time": "12:27:18", "remaining_time": "1 day, 12:25:16"} +{"current_steps": 7210, "total_steps": 28254, "loss": 0.7751, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.239336778514287e-05, "epoch": 0.51, "percentage": 25.52, "elapsed_time": "12:28:22", "remaining_time": "1 day, 12:24:17"} +{"current_steps": 7220, "total_steps": 28254, "loss": 0.7497, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.237338993220988e-05, "epoch": 0.51, "percentage": 25.55, "elapsed_time": "12:29:24", "remaining_time": "1 day, 12:23:15"} +{"current_steps": 7230, "total_steps": 28254, "loss": 0.7692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.23533905997661e-05, "epoch": 0.51, "percentage": 25.59, "elapsed_time": "12:30:28", "remaining_time": "1 day, 12:22:17"} +{"current_steps": 7240, "total_steps": 28254, "loss": 0.7796, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2333369812537583e-05, "epoch": 0.51, "percentage": 25.62, "elapsed_time": "12:31:31", "remaining_time": "1 day, 12:21:18"} +{"current_steps": 7250, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.231332759527695e-05, "epoch": 0.51, "percentage": 25.66, "elapsed_time": "12:32:34", "remaining_time": "1 day, 12:20:16"} +{"current_steps": 7260, "total_steps": 28254, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2293263972763295e-05, "epoch": 0.51, "percentage": 25.7, "elapsed_time": "12:33:36", "remaining_time": "1 day, 12:19:14"} +{"current_steps": 7270, "total_steps": 28254, "loss": 0.7488, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.227317896980221e-05, "epoch": 0.51, "percentage": 25.73, "elapsed_time": "12:34:38", "remaining_time": "1 day, 12:18:11"} +{"current_steps": 7280, "total_steps": 28254, "loss": 0.7418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.225307261122568e-05, "epoch": 0.52, "percentage": 25.77, "elapsed_time": "12:35:43", "remaining_time": "1 day, 12:17:17"} +{"current_steps": 7290, "total_steps": 28254, "loss": 0.7462, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.223294492189209e-05, "epoch": 0.52, "percentage": 25.8, "elapsed_time": "12:36:44", "remaining_time": "1 day, 12:16:10"} +{"current_steps": 7300, "total_steps": 28254, "loss": 0.7761, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2212795926686255e-05, "epoch": 0.52, "percentage": 25.84, "elapsed_time": "12:37:45", "remaining_time": "1 day, 12:15:04"} +{"current_steps": 7310, "total_steps": 28254, "loss": 0.7454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2192625650519265e-05, "epoch": 0.52, "percentage": 25.87, "elapsed_time": "12:38:47", "remaining_time": "1 day, 12:14:00"} +{"current_steps": 7320, "total_steps": 28254, "loss": 0.7579, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.217243411832856e-05, "epoch": 0.52, "percentage": 25.91, "elapsed_time": "12:39:50", "remaining_time": "1 day, 12:13:01"} +{"current_steps": 7330, "total_steps": 28254, "loss": 0.773, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.215222135507784e-05, "epoch": 0.52, "percentage": 25.94, "elapsed_time": "12:40:52", "remaining_time": "1 day, 12:11:59"} +{"current_steps": 7340, "total_steps": 28254, "loss": 0.7655, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2131987385757066e-05, "epoch": 0.52, "percentage": 25.98, "elapsed_time": "12:41:52", "remaining_time": "1 day, 12:10:50"} +{"current_steps": 7350, "total_steps": 28254, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.211173223538242e-05, "epoch": 0.52, "percentage": 26.01, "elapsed_time": "12:42:56", "remaining_time": "1 day, 12:09:51"} +{"current_steps": 7360, "total_steps": 28254, "loss": 0.7741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.209145592899625e-05, "epoch": 0.52, "percentage": 26.05, "elapsed_time": "12:44:00", "remaining_time": "1 day, 12:08:55"} +{"current_steps": 7370, "total_steps": 28254, "loss": 0.7681, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.207115849166709e-05, "epoch": 0.52, "percentage": 26.08, "elapsed_time": "12:45:01", "remaining_time": "1 day, 12:07:49"} +{"current_steps": 7380, "total_steps": 28254, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2050839948489565e-05, "epoch": 0.52, "percentage": 26.12, "elapsed_time": "12:46:06", "remaining_time": "1 day, 12:06:52"} +{"current_steps": 7390, "total_steps": 28254, "loss": 0.7798, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.203050032458443e-05, "epoch": 0.52, "percentage": 26.16, "elapsed_time": "12:47:07", "remaining_time": "1 day, 12:05:49"} +{"current_steps": 7400, "total_steps": 28254, "loss": 0.7405, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2010139645098476e-05, "epoch": 0.52, "percentage": 26.19, "elapsed_time": "12:48:09", "remaining_time": "1 day, 12:04:46"} +{"current_steps": 7410, "total_steps": 28254, "loss": 0.7491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1989757935204535e-05, "epoch": 0.52, "percentage": 26.23, "elapsed_time": "12:49:11", "remaining_time": "1 day, 12:03:43"} +{"current_steps": 7420, "total_steps": 28254, "loss": 0.7777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1969355220101446e-05, "epoch": 0.53, "percentage": 26.26, "elapsed_time": "12:50:14", "remaining_time": "1 day, 12:02:40"} +{"current_steps": 7430, "total_steps": 28254, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.194893152501401e-05, "epoch": 0.53, "percentage": 26.3, "elapsed_time": "12:51:15", "remaining_time": "1 day, 12:01:37"} +{"current_steps": 7440, "total_steps": 28254, "loss": 0.7891, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.192848687519296e-05, "epoch": 0.53, "percentage": 26.33, "elapsed_time": "12:52:18", "remaining_time": "1 day, 12:00:36"} +{"current_steps": 7450, "total_steps": 28254, "loss": 0.768, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.190802129591496e-05, "epoch": 0.53, "percentage": 26.37, "elapsed_time": "12:53:21", "remaining_time": "1 day, 11:59:35"} +{"current_steps": 7460, "total_steps": 28254, "loss": 0.7514, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.188753481248253e-05, "epoch": 0.53, "percentage": 26.4, "elapsed_time": "12:54:23", "remaining_time": "1 day, 11:58:33"} +{"current_steps": 7470, "total_steps": 28254, "loss": 0.7322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.186702745022403e-05, "epoch": 0.53, "percentage": 26.44, "elapsed_time": "12:55:26", "remaining_time": "1 day, 11:57:32"} +{"current_steps": 7480, "total_steps": 28254, "loss": 0.7411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1846499234493655e-05, "epoch": 0.53, "percentage": 26.47, "elapsed_time": "12:56:28", "remaining_time": "1 day, 11:56:28"} +{"current_steps": 7490, "total_steps": 28254, "loss": 0.743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.182595019067136e-05, "epoch": 0.53, "percentage": 26.51, "elapsed_time": "12:57:30", "remaining_time": "1 day, 11:55:25"} +{"current_steps": 7500, "total_steps": 28254, "loss": 0.7602, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.180538034416287e-05, "epoch": 0.53, "percentage": 26.54, "elapsed_time": "12:58:31", "remaining_time": "1 day, 11:54:20"} +{"current_steps": 7510, "total_steps": 28254, "loss": 0.7293, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.178478972039961e-05, "epoch": 0.53, "percentage": 26.58, "elapsed_time": "12:59:33", "remaining_time": "1 day, 11:53:17"} +{"current_steps": 7520, "total_steps": 28254, "loss": 0.763, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1764178344838716e-05, "epoch": 0.53, "percentage": 26.62, "elapsed_time": "13:00:36", "remaining_time": "1 day, 11:52:17"} +{"current_steps": 7530, "total_steps": 28254, "loss": 0.7368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.174354624296296e-05, "epoch": 0.53, "percentage": 26.65, "elapsed_time": "13:01:41", "remaining_time": "1 day, 11:51:22"} +{"current_steps": 7540, "total_steps": 28254, "loss": 0.7689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.172289344028075e-05, "epoch": 0.53, "percentage": 26.69, "elapsed_time": "13:02:42", "remaining_time": "1 day, 11:50:14"} +{"current_steps": 7550, "total_steps": 28254, "loss": 0.79, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.170221996232607e-05, "epoch": 0.53, "percentage": 26.72, "elapsed_time": "13:03:41", "remaining_time": "1 day, 11:49:04"} +{"current_steps": 7560, "total_steps": 28254, "loss": 0.7563, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.16815258346585e-05, "epoch": 0.54, "percentage": 26.76, "elapsed_time": "13:04:44", "remaining_time": "1 day, 11:48:05"} +{"current_steps": 7570, "total_steps": 28254, "loss": 0.7594, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1660811082863115e-05, "epoch": 0.54, "percentage": 26.79, "elapsed_time": "13:05:49", "remaining_time": "1 day, 11:47:09"} +{"current_steps": 7580, "total_steps": 28254, "loss": 0.7512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.164007573255052e-05, "epoch": 0.54, "percentage": 26.83, "elapsed_time": "13:06:52", "remaining_time": "1 day, 11:46:09"} +{"current_steps": 7590, "total_steps": 28254, "loss": 0.7693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.161931980935675e-05, "epoch": 0.54, "percentage": 26.86, "elapsed_time": "13:07:54", "remaining_time": "1 day, 11:45:05"} +{"current_steps": 7600, "total_steps": 28254, "loss": 0.7577, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.15985433389433e-05, "epoch": 0.54, "percentage": 26.9, "elapsed_time": "13:08:55", "remaining_time": "1 day, 11:44:01"} +{"current_steps": 7610, "total_steps": 28254, "loss": 0.7549, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.157774634699707e-05, "epoch": 0.54, "percentage": 26.93, "elapsed_time": "13:09:58", "remaining_time": "1 day, 11:42:59"} +{"current_steps": 7620, "total_steps": 28254, "loss": 0.7464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.155692885923033e-05, "epoch": 0.54, "percentage": 26.97, "elapsed_time": "13:11:01", "remaining_time": "1 day, 11:41:59"} +{"current_steps": 7630, "total_steps": 28254, "loss": 0.7663, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1536090901380664e-05, "epoch": 0.54, "percentage": 27.01, "elapsed_time": "13:12:06", "remaining_time": "1 day, 11:41:05"} +{"current_steps": 7640, "total_steps": 28254, "loss": 0.7683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.151523249921101e-05, "epoch": 0.54, "percentage": 27.04, "elapsed_time": "13:13:09", "remaining_time": "1 day, 11:40:03"} +{"current_steps": 7650, "total_steps": 28254, "loss": 0.7438, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.149435367850955e-05, "epoch": 0.54, "percentage": 27.08, "elapsed_time": "13:14:11", "remaining_time": "1 day, 11:39:01"} +{"current_steps": 7660, "total_steps": 28254, "loss": 0.7332, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.14734544650897e-05, "epoch": 0.54, "percentage": 27.11, "elapsed_time": "13:15:11", "remaining_time": "1 day, 11:37:52"} +{"current_steps": 7670, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.145253488479013e-05, "epoch": 0.54, "percentage": 27.15, "elapsed_time": "13:16:14", "remaining_time": "1 day, 11:36:52"} +{"current_steps": 7680, "total_steps": 28254, "loss": 0.7398, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.143159496347466e-05, "epoch": 0.54, "percentage": 27.18, "elapsed_time": "13:17:16", "remaining_time": "1 day, 11:35:50"} +{"current_steps": 7690, "total_steps": 28254, "loss": 0.784, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1410634727032264e-05, "epoch": 0.54, "percentage": 27.22, "elapsed_time": "13:18:19", "remaining_time": "1 day, 11:34:49"} +{"current_steps": 7700, "total_steps": 28254, "loss": 0.7534, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.138965420137704e-05, "epoch": 0.55, "percentage": 27.25, "elapsed_time": "13:19:23", "remaining_time": "1 day, 11:33:50"} +{"current_steps": 7710, "total_steps": 28254, "loss": 0.746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.136865341244815e-05, "epoch": 0.55, "percentage": 27.29, "elapsed_time": "13:20:25", "remaining_time": "1 day, 11:32:49"} +{"current_steps": 7720, "total_steps": 28254, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1347632386209834e-05, "epoch": 0.55, "percentage": 27.32, "elapsed_time": "13:21:27", "remaining_time": "1 day, 11:31:44"} +{"current_steps": 7730, "total_steps": 28254, "loss": 0.7417, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.132659114865134e-05, "epoch": 0.55, "percentage": 27.36, "elapsed_time": "13:22:29", "remaining_time": "1 day, 11:30:42"} +{"current_steps": 7740, "total_steps": 28254, "loss": 0.7658, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.13055297257869e-05, "epoch": 0.55, "percentage": 27.39, "elapsed_time": "13:23:34", "remaining_time": "1 day, 11:29:46"} +{"current_steps": 7750, "total_steps": 28254, "loss": 0.7414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1284448143655716e-05, "epoch": 0.55, "percentage": 27.43, "elapsed_time": "13:24:36", "remaining_time": "1 day, 11:28:44"} +{"current_steps": 7760, "total_steps": 28254, "loss": 0.7202, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.126334642832189e-05, "epoch": 0.55, "percentage": 27.47, "elapsed_time": "13:25:38", "remaining_time": "1 day, 11:27:41"} +{"current_steps": 7770, "total_steps": 28254, "loss": 0.7547, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1242224605874456e-05, "epoch": 0.55, "percentage": 27.5, "elapsed_time": "13:26:43", "remaining_time": "1 day, 11:26:44"} +{"current_steps": 7780, "total_steps": 28254, "loss": 0.7254, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.122108270242726e-05, "epoch": 0.55, "percentage": 27.54, "elapsed_time": "13:27:45", "remaining_time": "1 day, 11:25:42"} +{"current_steps": 7790, "total_steps": 28254, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.119992074411901e-05, "epoch": 0.55, "percentage": 27.57, "elapsed_time": "13:28:46", "remaining_time": "1 day, 11:24:38"} +{"current_steps": 7800, "total_steps": 28254, "loss": 0.7806, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1178738757113186e-05, "epoch": 0.55, "percentage": 27.61, "elapsed_time": "13:29:49", "remaining_time": "1 day, 11:23:37"} +{"current_steps": 7810, "total_steps": 28254, "loss": 0.7418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.115753676759805e-05, "epoch": 0.55, "percentage": 27.64, "elapsed_time": "13:30:50", "remaining_time": "1 day, 11:22:29"} +{"current_steps": 7820, "total_steps": 28254, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.113631480178657e-05, "epoch": 0.55, "percentage": 27.68, "elapsed_time": "13:31:53", "remaining_time": "1 day, 11:21:29"} +{"current_steps": 7830, "total_steps": 28254, "loss": 0.7351, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.111507288591645e-05, "epoch": 0.55, "percentage": 27.71, "elapsed_time": "13:32:55", "remaining_time": "1 day, 11:20:27"} +{"current_steps": 7840, "total_steps": 28254, "loss": 0.7437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.109381104625001e-05, "epoch": 0.55, "percentage": 27.75, "elapsed_time": "13:33:57", "remaining_time": "1 day, 11:19:23"} +{"current_steps": 7850, "total_steps": 28254, "loss": 0.7061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1072529309074235e-05, "epoch": 0.56, "percentage": 27.78, "elapsed_time": "13:34:57", "remaining_time": "1 day, 11:18:15"} +{"current_steps": 7860, "total_steps": 28254, "loss": 0.7358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.105122770070071e-05, "epoch": 0.56, "percentage": 27.82, "elapsed_time": "13:36:00", "remaining_time": "1 day, 11:17:16"} +{"current_steps": 7870, "total_steps": 28254, "loss": 0.7275, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1029906247465576e-05, "epoch": 0.56, "percentage": 27.85, "elapsed_time": "13:37:02", "remaining_time": "1 day, 11:16:11"} +{"current_steps": 7880, "total_steps": 28254, "loss": 0.8013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1008564975729514e-05, "epoch": 0.56, "percentage": 27.89, "elapsed_time": "13:38:05", "remaining_time": "1 day, 11:15:12"} +{"current_steps": 7890, "total_steps": 28254, "loss": 0.7475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.098720391187771e-05, "epoch": 0.56, "percentage": 27.93, "elapsed_time": "13:39:07", "remaining_time": "1 day, 11:14:08"} +{"current_steps": 7900, "total_steps": 28254, "loss": 0.7264, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.096582308231981e-05, "epoch": 0.56, "percentage": 27.96, "elapsed_time": "13:40:09", "remaining_time": "1 day, 11:13:05"} +{"current_steps": 7910, "total_steps": 28254, "loss": 0.7853, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.094442251348991e-05, "epoch": 0.56, "percentage": 28.0, "elapsed_time": "13:41:12", "remaining_time": "1 day, 11:12:06"} +{"current_steps": 7920, "total_steps": 28254, "loss": 0.7747, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.092300223184651e-05, "epoch": 0.56, "percentage": 28.03, "elapsed_time": "13:42:13", "remaining_time": "1 day, 11:11:01"} +{"current_steps": 7930, "total_steps": 28254, "loss": 0.7651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0901562263872465e-05, "epoch": 0.56, "percentage": 28.07, "elapsed_time": "13:43:17", "remaining_time": "1 day, 11:10:01"} +{"current_steps": 7940, "total_steps": 28254, "loss": 0.7529, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.088010263607499e-05, "epoch": 0.56, "percentage": 28.1, "elapsed_time": "13:44:19", "remaining_time": "1 day, 11:08:59"} +{"current_steps": 7950, "total_steps": 28254, "loss": 0.7526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.08586233749856e-05, "epoch": 0.56, "percentage": 28.14, "elapsed_time": "13:45:22", "remaining_time": "1 day, 11:07:57"} +{"current_steps": 7960, "total_steps": 28254, "loss": 0.7322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0837124507160064e-05, "epoch": 0.56, "percentage": 28.17, "elapsed_time": "13:46:22", "remaining_time": "1 day, 11:06:51"} +{"current_steps": 7970, "total_steps": 28254, "loss": 0.757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0815606059178423e-05, "epoch": 0.56, "percentage": 28.21, "elapsed_time": "13:47:25", "remaining_time": "1 day, 11:05:48"} +{"current_steps": 7980, "total_steps": 28254, "loss": 0.7799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0794068057644904e-05, "epoch": 0.56, "percentage": 28.24, "elapsed_time": "13:48:27", "remaining_time": "1 day, 11:04:46"} +{"current_steps": 7990, "total_steps": 28254, "loss": 0.7197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0772510529187924e-05, "epoch": 0.57, "percentage": 28.28, "elapsed_time": "13:49:29", "remaining_time": "1 day, 11:03:43"} +{"current_steps": 8000, "total_steps": 28254, "loss": 0.7224, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0750933500460025e-05, "epoch": 0.57, "percentage": 28.31, "elapsed_time": "13:50:31", "remaining_time": "1 day, 11:02:41"} +{"current_steps": 8010, "total_steps": 28254, "loss": 0.7208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.072933699813788e-05, "epoch": 0.57, "percentage": 28.35, "elapsed_time": "13:51:36", "remaining_time": "1 day, 11:01:44"} +{"current_steps": 8020, "total_steps": 28254, "loss": 0.7544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.070772104892221e-05, "epoch": 0.57, "percentage": 28.39, "elapsed_time": "13:52:38", "remaining_time": "1 day, 11:00:43"} +{"current_steps": 8030, "total_steps": 28254, "loss": 0.7631, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.068608567953781e-05, "epoch": 0.57, "percentage": 28.42, "elapsed_time": "13:53:41", "remaining_time": "1 day, 10:59:42"} +{"current_steps": 8040, "total_steps": 28254, "loss": 0.7584, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.066443091673345e-05, "epoch": 0.57, "percentage": 28.46, "elapsed_time": "13:54:45", "remaining_time": "1 day, 10:58:42"} +{"current_steps": 8050, "total_steps": 28254, "loss": 0.7454, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.064275678728191e-05, "epoch": 0.57, "percentage": 28.49, "elapsed_time": "13:55:47", "remaining_time": "1 day, 10:57:39"} +{"current_steps": 8060, "total_steps": 28254, "loss": 0.7882, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0621063317979904e-05, "epoch": 0.57, "percentage": 28.53, "elapsed_time": "13:56:48", "remaining_time": "1 day, 10:56:35"} +{"current_steps": 8070, "total_steps": 28254, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.059935053564805e-05, "epoch": 0.57, "percentage": 28.56, "elapsed_time": "13:57:48", "remaining_time": "1 day, 10:55:27"} +{"current_steps": 8080, "total_steps": 28254, "loss": 0.7452, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.057761846713084e-05, "epoch": 0.57, "percentage": 28.6, "elapsed_time": "13:58:52", "remaining_time": "1 day, 10:54:29"} +{"current_steps": 8090, "total_steps": 28254, "loss": 0.7729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.055586713929662e-05, "epoch": 0.57, "percentage": 28.63, "elapsed_time": "13:59:54", "remaining_time": "1 day, 10:53:27"} +{"current_steps": 8100, "total_steps": 28254, "loss": 0.7471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.053409657903755e-05, "epoch": 0.57, "percentage": 28.67, "elapsed_time": "14:00:55", "remaining_time": "1 day, 10:52:21"} +{"current_steps": 8110, "total_steps": 28254, "loss": 0.7553, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0512306813269555e-05, "epoch": 0.57, "percentage": 28.7, "elapsed_time": "14:01:56", "remaining_time": "1 day, 10:51:16"} +{"current_steps": 8120, "total_steps": 28254, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0490497868932306e-05, "epoch": 0.57, "percentage": 28.74, "elapsed_time": "14:02:58", "remaining_time": "1 day, 10:50:12"} +{"current_steps": 8130, "total_steps": 28254, "loss": 0.7419, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.046866977298921e-05, "epoch": 0.58, "percentage": 28.77, "elapsed_time": "14:04:02", "remaining_time": "1 day, 10:49:14"} +{"current_steps": 8140, "total_steps": 28254, "loss": 0.7688, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.044682255242732e-05, "epoch": 0.58, "percentage": 28.81, "elapsed_time": "14:05:06", "remaining_time": "1 day, 10:48:16"} +{"current_steps": 8150, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.042495623425735e-05, "epoch": 0.58, "percentage": 28.85, "elapsed_time": "14:06:07", "remaining_time": "1 day, 10:47:10"} +{"current_steps": 8160, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.040307084551362e-05, "epoch": 0.58, "percentage": 28.88, "elapsed_time": "14:07:10", "remaining_time": "1 day, 10:46:11"} +{"current_steps": 8170, "total_steps": 28254, "loss": 0.7233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.038116641325403e-05, "epoch": 0.58, "percentage": 28.92, "elapsed_time": "14:08:12", "remaining_time": "1 day, 10:45:06"} +{"current_steps": 8180, "total_steps": 28254, "loss": 0.7869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.035924296456003e-05, "epoch": 0.58, "percentage": 28.95, "elapsed_time": "14:09:12", "remaining_time": "1 day, 10:44:00"} +{"current_steps": 8190, "total_steps": 28254, "loss": 0.7391, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.033730052653656e-05, "epoch": 0.58, "percentage": 28.99, "elapsed_time": "14:10:15", "remaining_time": "1 day, 10:42:59"} +{"current_steps": 8200, "total_steps": 28254, "loss": 0.7531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.031533912631207e-05, "epoch": 0.58, "percentage": 29.02, "elapsed_time": "14:11:19", "remaining_time": "1 day, 10:42:00"} +{"current_steps": 8210, "total_steps": 28254, "loss": 0.7616, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0293358791038426e-05, "epoch": 0.58, "percentage": 29.06, "elapsed_time": "14:12:21", "remaining_time": "1 day, 10:40:56"} +{"current_steps": 8220, "total_steps": 28254, "loss": 0.7474, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.027135954789093e-05, "epoch": 0.58, "percentage": 29.09, "elapsed_time": "14:13:24", "remaining_time": "1 day, 10:39:57"} +{"current_steps": 8230, "total_steps": 28254, "loss": 0.7436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.024934142406822e-05, "epoch": 0.58, "percentage": 29.13, "elapsed_time": "14:14:25", "remaining_time": "1 day, 10:38:52"} +{"current_steps": 8240, "total_steps": 28254, "loss": 0.7671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0227304446792313e-05, "epoch": 0.58, "percentage": 29.16, "elapsed_time": "14:15:27", "remaining_time": "1 day, 10:37:49"} +{"current_steps": 8250, "total_steps": 28254, "loss": 0.7358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.020524864330854e-05, "epoch": 0.58, "percentage": 29.2, "elapsed_time": "14:16:31", "remaining_time": "1 day, 10:36:50"} +{"current_steps": 8260, "total_steps": 28254, "loss": 0.7542, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.018317404088546e-05, "epoch": 0.58, "percentage": 29.23, "elapsed_time": "14:17:34", "remaining_time": "1 day, 10:35:49"} +{"current_steps": 8270, "total_steps": 28254, "loss": 0.7609, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.016108066681494e-05, "epoch": 0.59, "percentage": 29.27, "elapsed_time": "14:18:36", "remaining_time": "1 day, 10:34:47"} +{"current_steps": 8280, "total_steps": 28254, "loss": 0.7676, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0138968548412006e-05, "epoch": 0.59, "percentage": 29.31, "elapsed_time": "14:19:38", "remaining_time": "1 day, 10:33:44"} +{"current_steps": 8290, "total_steps": 28254, "loss": 0.7197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.011683771301486e-05, "epoch": 0.59, "percentage": 29.34, "elapsed_time": "14:20:41", "remaining_time": "1 day, 10:32:42"} +{"current_steps": 8300, "total_steps": 28254, "loss": 0.7711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.009468818798488e-05, "epoch": 0.59, "percentage": 29.38, "elapsed_time": "14:21:45", "remaining_time": "1 day, 10:31:45"} +{"current_steps": 8310, "total_steps": 28254, "loss": 0.7477, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.007252000070653e-05, "epoch": 0.59, "percentage": 29.41, "elapsed_time": "14:22:48", "remaining_time": "1 day, 10:30:44"} +{"current_steps": 8320, "total_steps": 28254, "loss": 0.7677, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.005033317858734e-05, "epoch": 0.59, "percentage": 29.45, "elapsed_time": "14:23:51", "remaining_time": "1 day, 10:29:43"} +{"current_steps": 8330, "total_steps": 28254, "loss": 0.739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.002812774905788e-05, "epoch": 0.59, "percentage": 29.48, "elapsed_time": "14:24:54", "remaining_time": "1 day, 10:28:44"} +{"current_steps": 8340, "total_steps": 28254, "loss": 0.7243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0005903739571725e-05, "epoch": 0.59, "percentage": 29.52, "elapsed_time": "14:25:56", "remaining_time": "1 day, 10:27:39"} +{"current_steps": 8350, "total_steps": 28254, "loss": 0.7648, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.998366117760545e-05, "epoch": 0.59, "percentage": 29.55, "elapsed_time": "14:26:58", "remaining_time": "1 day, 10:26:37"} +{"current_steps": 8360, "total_steps": 28254, "loss": 0.721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9961400090658526e-05, "epoch": 0.59, "percentage": 29.59, "elapsed_time": "14:28:00", "remaining_time": "1 day, 10:25:33"} +{"current_steps": 8370, "total_steps": 28254, "loss": 0.7516, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.993912050625336e-05, "epoch": 0.59, "percentage": 29.62, "elapsed_time": "14:29:01", "remaining_time": "1 day, 10:24:28"} +{"current_steps": 8380, "total_steps": 28254, "loss": 0.7644, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.991682245193519e-05, "epoch": 0.59, "percentage": 29.66, "elapsed_time": "14:30:03", "remaining_time": "1 day, 10:23:25"} +{"current_steps": 8390, "total_steps": 28254, "loss": 0.7364, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.989450595527214e-05, "epoch": 0.59, "percentage": 29.69, "elapsed_time": "14:31:08", "remaining_time": "1 day, 10:22:29"} +{"current_steps": 8400, "total_steps": 28254, "loss": 0.7517, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.987217104385509e-05, "epoch": 0.59, "percentage": 29.73, "elapsed_time": "14:32:13", "remaining_time": "1 day, 10:21:33"} +{"current_steps": 8410, "total_steps": 28254, "loss": 0.7686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.984981774529771e-05, "epoch": 0.6, "percentage": 29.77, "elapsed_time": "14:33:16", "remaining_time": "1 day, 10:20:32"} +{"current_steps": 8420, "total_steps": 28254, "loss": 0.7526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.982744608723641e-05, "epoch": 0.6, "percentage": 29.8, "elapsed_time": "14:34:17", "remaining_time": "1 day, 10:19:28"} +{"current_steps": 8430, "total_steps": 28254, "loss": 0.7468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.980505609733027e-05, "epoch": 0.6, "percentage": 29.84, "elapsed_time": "14:35:20", "remaining_time": "1 day, 10:18:27"} +{"current_steps": 8440, "total_steps": 28254, "loss": 0.7765, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.978264780326105e-05, "epoch": 0.6, "percentage": 29.87, "elapsed_time": "14:36:23", "remaining_time": "1 day, 10:17:26"} +{"current_steps": 8450, "total_steps": 28254, "loss": 0.7367, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.976022123273316e-05, "epoch": 0.6, "percentage": 29.91, "elapsed_time": "14:37:26", "remaining_time": "1 day, 10:16:26"} +{"current_steps": 8460, "total_steps": 28254, "loss": 0.732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.973777641347357e-05, "epoch": 0.6, "percentage": 29.94, "elapsed_time": "14:38:28", "remaining_time": "1 day, 10:15:23"} +{"current_steps": 8470, "total_steps": 28254, "loss": 0.7508, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.971531337323183e-05, "epoch": 0.6, "percentage": 29.98, "elapsed_time": "14:39:30", "remaining_time": "1 day, 10:14:19"} +{"current_steps": 8480, "total_steps": 28254, "loss": 0.739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.969283213978003e-05, "epoch": 0.6, "percentage": 30.01, "elapsed_time": "14:40:32", "remaining_time": "1 day, 10:13:17"} +{"current_steps": 8490, "total_steps": 28254, "loss": 0.7511, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.967033274091273e-05, "epoch": 0.6, "percentage": 30.05, "elapsed_time": "14:41:37", "remaining_time": "1 day, 10:12:21"} +{"current_steps": 8500, "total_steps": 28254, "loss": 0.7497, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.964781520444696e-05, "epoch": 0.6, "percentage": 30.08, "elapsed_time": "14:42:39", "remaining_time": "1 day, 10:11:17"} +{"current_steps": 8510, "total_steps": 28254, "loss": 0.7393, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.962527955822217e-05, "epoch": 0.6, "percentage": 30.12, "elapsed_time": "14:43:42", "remaining_time": "1 day, 10:10:16"} +{"current_steps": 8520, "total_steps": 28254, "loss": 0.7489, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.96027258301002e-05, "epoch": 0.6, "percentage": 30.16, "elapsed_time": "14:44:44", "remaining_time": "1 day, 10:09:14"} +{"current_steps": 8530, "total_steps": 28254, "loss": 0.7484, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.958015404796526e-05, "epoch": 0.6, "percentage": 30.19, "elapsed_time": "14:45:48", "remaining_time": "1 day, 10:08:15"} +{"current_steps": 8540, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.955756423972385e-05, "epoch": 0.6, "percentage": 30.23, "elapsed_time": "14:46:51", "remaining_time": "1 day, 10:07:14"} +{"current_steps": 8550, "total_steps": 28254, "loss": 0.7289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9534956433304806e-05, "epoch": 0.61, "percentage": 30.26, "elapsed_time": "14:47:53", "remaining_time": "1 day, 10:06:11"} +{"current_steps": 8560, "total_steps": 28254, "loss": 0.7621, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9512330656659155e-05, "epoch": 0.61, "percentage": 30.3, "elapsed_time": "14:48:54", "remaining_time": "1 day, 10:05:07"} +{"current_steps": 8570, "total_steps": 28254, "loss": 0.7426, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9489686937760195e-05, "epoch": 0.61, "percentage": 30.33, "elapsed_time": "14:49:57", "remaining_time": "1 day, 10:04:05"} +{"current_steps": 8580, "total_steps": 28254, "loss": 0.7531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.946702530460337e-05, "epoch": 0.61, "percentage": 30.37, "elapsed_time": "14:51:02", "remaining_time": "1 day, 10:03:09"} +{"current_steps": 8590, "total_steps": 28254, "loss": 0.7292, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9444345785206285e-05, "epoch": 0.61, "percentage": 30.4, "elapsed_time": "14:52:04", "remaining_time": "1 day, 10:02:06"} +{"current_steps": 8600, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.942164840760866e-05, "epoch": 0.61, "percentage": 30.44, "elapsed_time": "14:53:07", "remaining_time": "1 day, 10:01:05"} +{"current_steps": 8610, "total_steps": 28254, "loss": 0.7325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.93989331998723e-05, "epoch": 0.61, "percentage": 30.47, "elapsed_time": "14:54:09", "remaining_time": "1 day, 10:00:03"} +{"current_steps": 8620, "total_steps": 28254, "loss": 0.7309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.937620019008105e-05, "epoch": 0.61, "percentage": 30.51, "elapsed_time": "14:55:11", "remaining_time": "1 day, 9:59:00"} +{"current_steps": 8630, "total_steps": 28254, "loss": 0.7346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9353449406340755e-05, "epoch": 0.61, "percentage": 30.54, "elapsed_time": "14:56:12", "remaining_time": "1 day, 9:57:54"} +{"current_steps": 8640, "total_steps": 28254, "loss": 0.7604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.933068087677924e-05, "epoch": 0.61, "percentage": 30.58, "elapsed_time": "14:57:16", "remaining_time": "1 day, 9:56:56"} +{"current_steps": 8650, "total_steps": 28254, "loss": 0.7602, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.930789462954628e-05, "epoch": 0.61, "percentage": 30.62, "elapsed_time": "14:58:19", "remaining_time": "1 day, 9:55:54"} +{"current_steps": 8660, "total_steps": 28254, "loss": 0.7238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9285090692813544e-05, "epoch": 0.61, "percentage": 30.65, "elapsed_time": "14:59:21", "remaining_time": "1 day, 9:54:51"} +{"current_steps": 8670, "total_steps": 28254, "loss": 0.7481, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9262269094774564e-05, "epoch": 0.61, "percentage": 30.69, "elapsed_time": "15:00:25", "remaining_time": "1 day, 9:53:53"} +{"current_steps": 8680, "total_steps": 28254, "loss": 0.7412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9239429863644736e-05, "epoch": 0.61, "percentage": 30.72, "elapsed_time": "15:01:26", "remaining_time": "1 day, 9:52:48"} +{"current_steps": 8690, "total_steps": 28254, "loss": 0.7643, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.921657302766123e-05, "epoch": 0.62, "percentage": 30.76, "elapsed_time": "15:02:28", "remaining_time": "1 day, 9:51:46"} +{"current_steps": 8700, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9193698615082995e-05, "epoch": 0.62, "percentage": 30.79, "elapsed_time": "15:03:30", "remaining_time": "1 day, 9:50:43"} +{"current_steps": 8710, "total_steps": 28254, "loss": 0.77, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9170806654190695e-05, "epoch": 0.62, "percentage": 30.83, "elapsed_time": "15:04:34", "remaining_time": "1 day, 9:49:43"} +{"current_steps": 8720, "total_steps": 28254, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.914789717328671e-05, "epoch": 0.62, "percentage": 30.86, "elapsed_time": "15:05:35", "remaining_time": "1 day, 9:48:39"} +{"current_steps": 8730, "total_steps": 28254, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.912497020069505e-05, "epoch": 0.62, "percentage": 30.9, "elapsed_time": "15:06:37", "remaining_time": "1 day, 9:47:35"} +{"current_steps": 8740, "total_steps": 28254, "loss": 0.7589, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.910202576476142e-05, "epoch": 0.62, "percentage": 30.93, "elapsed_time": "15:07:44", "remaining_time": "1 day, 9:46:43"} +{"current_steps": 8750, "total_steps": 28254, "loss": 0.733, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.907906389385302e-05, "epoch": 0.62, "percentage": 30.97, "elapsed_time": "15:08:46", "remaining_time": "1 day, 9:45:40"} +{"current_steps": 8760, "total_steps": 28254, "loss": 0.7525, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9056084616358666e-05, "epoch": 0.62, "percentage": 31.0, "elapsed_time": "15:09:47", "remaining_time": "1 day, 9:44:36"} +{"current_steps": 8770, "total_steps": 28254, "loss": 0.7483, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.90330879606887e-05, "epoch": 0.62, "percentage": 31.04, "elapsed_time": "15:10:50", "remaining_time": "1 day, 9:43:35"} +{"current_steps": 8780, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9010073955274915e-05, "epoch": 0.62, "percentage": 31.08, "elapsed_time": "15:11:54", "remaining_time": "1 day, 9:42:36"} +{"current_steps": 8790, "total_steps": 28254, "loss": 0.7235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.898704262857057e-05, "epoch": 0.62, "percentage": 31.11, "elapsed_time": "15:12:57", "remaining_time": "1 day, 9:41:34"} +{"current_steps": 8800, "total_steps": 28254, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8963994009050356e-05, "epoch": 0.62, "percentage": 31.15, "elapsed_time": "15:13:59", "remaining_time": "1 day, 9:40:32"} +{"current_steps": 8810, "total_steps": 28254, "loss": 0.7502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.894092812521031e-05, "epoch": 0.62, "percentage": 31.18, "elapsed_time": "15:15:00", "remaining_time": "1 day, 9:39:27"} +{"current_steps": 8820, "total_steps": 28254, "loss": 0.7344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.891784500556784e-05, "epoch": 0.62, "percentage": 31.22, "elapsed_time": "15:16:03", "remaining_time": "1 day, 9:38:26"} +{"current_steps": 8830, "total_steps": 28254, "loss": 0.7401, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8894744678661655e-05, "epoch": 0.63, "percentage": 31.25, "elapsed_time": "15:17:05", "remaining_time": "1 day, 9:37:24"} +{"current_steps": 8840, "total_steps": 28254, "loss": 0.7561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.887162717305173e-05, "epoch": 0.63, "percentage": 31.29, "elapsed_time": "15:18:07", "remaining_time": "1 day, 9:36:20"} +{"current_steps": 8850, "total_steps": 28254, "loss": 0.7565, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.88484925173193e-05, "epoch": 0.63, "percentage": 31.32, "elapsed_time": "15:19:08", "remaining_time": "1 day, 9:35:15"} +{"current_steps": 8860, "total_steps": 28254, "loss": 0.7528, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.882534074006678e-05, "epoch": 0.63, "percentage": 31.36, "elapsed_time": "15:20:11", "remaining_time": "1 day, 9:34:13"} +{"current_steps": 8870, "total_steps": 28254, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8802171869917765e-05, "epoch": 0.63, "percentage": 31.39, "elapsed_time": "15:21:13", "remaining_time": "1 day, 9:33:12"} +{"current_steps": 8880, "total_steps": 28254, "loss": 0.7542, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8778985935516985e-05, "epoch": 0.63, "percentage": 31.43, "elapsed_time": "15:22:17", "remaining_time": "1 day, 9:32:12"} +{"current_steps": 8890, "total_steps": 28254, "loss": 0.7435, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8755782965530265e-05, "epoch": 0.63, "percentage": 31.46, "elapsed_time": "15:23:18", "remaining_time": "1 day, 9:31:07"} +{"current_steps": 8900, "total_steps": 28254, "loss": 0.7558, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.873256298864448e-05, "epoch": 0.63, "percentage": 31.5, "elapsed_time": "15:24:21", "remaining_time": "1 day, 9:30:07"} +{"current_steps": 8910, "total_steps": 28254, "loss": 0.7552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.870932603356755e-05, "epoch": 0.63, "percentage": 31.54, "elapsed_time": "15:25:25", "remaining_time": "1 day, 9:29:07"} +{"current_steps": 8920, "total_steps": 28254, "loss": 0.7223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8686072129028385e-05, "epoch": 0.63, "percentage": 31.57, "elapsed_time": "15:26:26", "remaining_time": "1 day, 9:28:03"} +{"current_steps": 8930, "total_steps": 28254, "loss": 0.7385, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.866280130377682e-05, "epoch": 0.63, "percentage": 31.61, "elapsed_time": "15:27:28", "remaining_time": "1 day, 9:27:00"} +{"current_steps": 8940, "total_steps": 28254, "loss": 0.7372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8639513586583656e-05, "epoch": 0.63, "percentage": 31.64, "elapsed_time": "15:28:31", "remaining_time": "1 day, 9:26:00"} +{"current_steps": 8950, "total_steps": 28254, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.861620900624054e-05, "epoch": 0.63, "percentage": 31.68, "elapsed_time": "15:29:33", "remaining_time": "1 day, 9:24:56"} +{"current_steps": 8960, "total_steps": 28254, "loss": 0.7633, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.859288759156e-05, "epoch": 0.63, "percentage": 31.71, "elapsed_time": "15:30:33", "remaining_time": "1 day, 9:23:48"} +{"current_steps": 8970, "total_steps": 28254, "loss": 0.7412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8569549371375346e-05, "epoch": 0.63, "percentage": 31.75, "elapsed_time": "15:31:38", "remaining_time": "1 day, 9:22:51"} +{"current_steps": 8980, "total_steps": 28254, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.854619437454068e-05, "epoch": 0.64, "percentage": 31.78, "elapsed_time": "15:32:41", "remaining_time": "1 day, 9:21:52"} +{"current_steps": 8990, "total_steps": 28254, "loss": 0.7281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8522822629930844e-05, "epoch": 0.64, "percentage": 31.82, "elapsed_time": "15:33:43", "remaining_time": "1 day, 9:20:49"} +{"current_steps": 9000, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.849943416644139e-05, "epoch": 0.64, "percentage": 31.85, "elapsed_time": "15:34:44", "remaining_time": "1 day, 9:19:43"} +{"current_steps": 9010, "total_steps": 28254, "loss": 0.7543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.847602901298854e-05, "epoch": 0.64, "percentage": 31.89, "elapsed_time": "15:35:47", "remaining_time": "1 day, 9:18:42"} +{"current_steps": 9020, "total_steps": 28254, "loss": 0.7569, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.845260719850915e-05, "epoch": 0.64, "percentage": 31.92, "elapsed_time": "15:36:50", "remaining_time": "1 day, 9:17:42"} +{"current_steps": 9030, "total_steps": 28254, "loss": 0.7212, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.842916875196066e-05, "epoch": 0.64, "percentage": 31.96, "elapsed_time": "15:37:51", "remaining_time": "1 day, 9:16:36"} +{"current_steps": 9040, "total_steps": 28254, "loss": 0.734, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.84057137023211e-05, "epoch": 0.64, "percentage": 32.0, "elapsed_time": "15:38:54", "remaining_time": "1 day, 9:15:35"} +{"current_steps": 9050, "total_steps": 28254, "loss": 0.7038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8382242078589006e-05, "epoch": 0.64, "percentage": 32.03, "elapsed_time": "15:39:56", "remaining_time": "1 day, 9:14:32"} +{"current_steps": 9060, "total_steps": 28254, "loss": 0.7444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8358753909783405e-05, "epoch": 0.64, "percentage": 32.07, "elapsed_time": "15:41:00", "remaining_time": "1 day, 9:13:33"} +{"current_steps": 9070, "total_steps": 28254, "loss": 0.7663, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.83352492249438e-05, "epoch": 0.64, "percentage": 32.1, "elapsed_time": "15:42:02", "remaining_time": "1 day, 9:12:31"} +{"current_steps": 9080, "total_steps": 28254, "loss": 0.7659, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.831172805313009e-05, "epoch": 0.64, "percentage": 32.14, "elapsed_time": "15:43:04", "remaining_time": "1 day, 9:11:27"} +{"current_steps": 9090, "total_steps": 28254, "loss": 0.7406, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8288190423422585e-05, "epoch": 0.64, "percentage": 32.17, "elapsed_time": "15:44:06", "remaining_time": "1 day, 9:10:25"} +{"current_steps": 9100, "total_steps": 28254, "loss": 0.7292, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8264636364921904e-05, "epoch": 0.64, "percentage": 32.21, "elapsed_time": "15:45:09", "remaining_time": "1 day, 9:09:24"} +{"current_steps": 9110, "total_steps": 28254, "loss": 0.7383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.824106590674901e-05, "epoch": 0.64, "percentage": 32.24, "elapsed_time": "15:46:11", "remaining_time": "1 day, 9:08:20"} +{"current_steps": 9120, "total_steps": 28254, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.821747907804513e-05, "epoch": 0.65, "percentage": 32.28, "elapsed_time": "15:47:12", "remaining_time": "1 day, 9:07:15"} +{"current_steps": 9130, "total_steps": 28254, "loss": 0.7535, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.819387590797172e-05, "epoch": 0.65, "percentage": 32.31, "elapsed_time": "15:48:13", "remaining_time": "1 day, 9:06:11"} +{"current_steps": 9140, "total_steps": 28254, "loss": 0.7512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.817025642571046e-05, "epoch": 0.65, "percentage": 32.35, "elapsed_time": "15:49:16", "remaining_time": "1 day, 9:05:10"} +{"current_steps": 9150, "total_steps": 28254, "loss": 0.7285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.814662066046319e-05, "epoch": 0.65, "percentage": 32.38, "elapsed_time": "15:50:19", "remaining_time": "1 day, 9:04:08"} +{"current_steps": 9160, "total_steps": 28254, "loss": 0.7604, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.81229686414519e-05, "epoch": 0.65, "percentage": 32.42, "elapsed_time": "15:51:20", "remaining_time": "1 day, 9:03:04"} +{"current_steps": 9170, "total_steps": 28254, "loss": 0.7449, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8099300397918606e-05, "epoch": 0.65, "percentage": 32.46, "elapsed_time": "15:52:21", "remaining_time": "1 day, 9:02:00"} +{"current_steps": 9180, "total_steps": 28254, "loss": 0.7395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8075615959125465e-05, "epoch": 0.65, "percentage": 32.49, "elapsed_time": "15:53:22", "remaining_time": "1 day, 9:00:54"} +{"current_steps": 9190, "total_steps": 28254, "loss": 0.7444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.805191535435463e-05, "epoch": 0.65, "percentage": 32.53, "elapsed_time": "15:54:25", "remaining_time": "1 day, 8:59:52"} +{"current_steps": 9200, "total_steps": 28254, "loss": 0.7471, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.802819861290822e-05, "epoch": 0.65, "percentage": 32.56, "elapsed_time": "15:55:28", "remaining_time": "1 day, 8:58:52"} +{"current_steps": 9210, "total_steps": 28254, "loss": 0.7874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.800446576410831e-05, "epoch": 0.65, "percentage": 32.6, "elapsed_time": "15:56:31", "remaining_time": "1 day, 8:57:51"} +{"current_steps": 9220, "total_steps": 28254, "loss": 0.7581, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7980716837296924e-05, "epoch": 0.65, "percentage": 32.63, "elapsed_time": "15:57:35", "remaining_time": "1 day, 8:56:52"} +{"current_steps": 9230, "total_steps": 28254, "loss": 0.7719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.795695186183592e-05, "epoch": 0.65, "percentage": 32.67, "elapsed_time": "15:58:36", "remaining_time": "1 day, 8:55:47"} +{"current_steps": 9240, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.793317086710703e-05, "epoch": 0.65, "percentage": 32.7, "elapsed_time": "15:59:40", "remaining_time": "1 day, 8:54:48"} +{"current_steps": 9250, "total_steps": 28254, "loss": 0.752, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.790937388251176e-05, "epoch": 0.65, "percentage": 32.74, "elapsed_time": "16:00:42", "remaining_time": "1 day, 8:53:45"} +{"current_steps": 9260, "total_steps": 28254, "loss": 0.7395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.788556093747142e-05, "epoch": 0.66, "percentage": 32.77, "elapsed_time": "16:01:45", "remaining_time": "1 day, 8:52:43"} +{"current_steps": 9270, "total_steps": 28254, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7861732061427024e-05, "epoch": 0.66, "percentage": 32.81, "elapsed_time": "16:02:44", "remaining_time": "1 day, 8:51:36"} +{"current_steps": 9280, "total_steps": 28254, "loss": 0.7559, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.783788728383929e-05, "epoch": 0.66, "percentage": 32.84, "elapsed_time": "16:03:45", "remaining_time": "1 day, 8:50:30"} +{"current_steps": 9290, "total_steps": 28254, "loss": 0.7456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7814026634188616e-05, "epoch": 0.66, "percentage": 32.88, "elapsed_time": "16:04:50", "remaining_time": "1 day, 8:49:33"} +{"current_steps": 9300, "total_steps": 28254, "loss": 0.7293, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.779015014197499e-05, "epoch": 0.66, "percentage": 32.92, "elapsed_time": "16:05:54", "remaining_time": "1 day, 8:48:34"} +{"current_steps": 9310, "total_steps": 28254, "loss": 0.7386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.776625783671802e-05, "epoch": 0.66, "percentage": 32.95, "elapsed_time": "16:06:55", "remaining_time": "1 day, 8:47:30"} +{"current_steps": 9320, "total_steps": 28254, "loss": 0.711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.774234974795683e-05, "epoch": 0.66, "percentage": 32.99, "elapsed_time": "16:07:59", "remaining_time": "1 day, 8:46:32"} +{"current_steps": 9330, "total_steps": 28254, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.771842590525008e-05, "epoch": 0.66, "percentage": 33.02, "elapsed_time": "16:09:00", "remaining_time": "1 day, 8:45:25"} +{"current_steps": 9340, "total_steps": 28254, "loss": 0.7446, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.769448633817591e-05, "epoch": 0.66, "percentage": 33.06, "elapsed_time": "16:10:04", "remaining_time": "1 day, 8:44:27"} +{"current_steps": 9350, "total_steps": 28254, "loss": 0.7554, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7670531076331895e-05, "epoch": 0.66, "percentage": 33.09, "elapsed_time": "16:11:08", "remaining_time": "1 day, 8:43:29"} +{"current_steps": 9360, "total_steps": 28254, "loss": 0.7632, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7646560149334995e-05, "epoch": 0.66, "percentage": 33.13, "elapsed_time": "16:12:10", "remaining_time": "1 day, 8:42:24"} +{"current_steps": 9370, "total_steps": 28254, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.762257358682158e-05, "epoch": 0.66, "percentage": 33.16, "elapsed_time": "16:13:13", "remaining_time": "1 day, 8:41:23"} +{"current_steps": 9380, "total_steps": 28254, "loss": 0.7343, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.759857141844732e-05, "epoch": 0.66, "percentage": 33.2, "elapsed_time": "16:14:17", "remaining_time": "1 day, 8:40:25"} +{"current_steps": 9390, "total_steps": 28254, "loss": 0.747, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7574553673887164e-05, "epoch": 0.66, "percentage": 33.23, "elapsed_time": "16:15:19", "remaining_time": "1 day, 8:39:23"} +{"current_steps": 9400, "total_steps": 28254, "loss": 0.7378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7550520382835365e-05, "epoch": 0.67, "percentage": 33.27, "elapsed_time": "16:16:22", "remaining_time": "1 day, 8:38:22"} +{"current_steps": 9410, "total_steps": 28254, "loss": 0.7587, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.752647157500536e-05, "epoch": 0.67, "percentage": 33.31, "elapsed_time": "16:17:25", "remaining_time": "1 day, 8:37:19"} +{"current_steps": 9420, "total_steps": 28254, "loss": 0.7305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.750240728012979e-05, "epoch": 0.67, "percentage": 33.34, "elapsed_time": "16:18:27", "remaining_time": "1 day, 8:36:17"} +{"current_steps": 9430, "total_steps": 28254, "loss": 0.7188, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7478327527960424e-05, "epoch": 0.67, "percentage": 33.38, "elapsed_time": "16:19:30", "remaining_time": "1 day, 8:35:17"} +{"current_steps": 9440, "total_steps": 28254, "loss": 0.7295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.745423234826817e-05, "epoch": 0.67, "percentage": 33.41, "elapsed_time": "16:20:33", "remaining_time": "1 day, 8:34:15"} +{"current_steps": 9450, "total_steps": 28254, "loss": 0.7137, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7430121770842974e-05, "epoch": 0.67, "percentage": 33.45, "elapsed_time": "16:21:36", "remaining_time": "1 day, 8:33:15"} +{"current_steps": 9460, "total_steps": 28254, "loss": 0.7619, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7405995825493855e-05, "epoch": 0.67, "percentage": 33.48, "elapsed_time": "16:22:39", "remaining_time": "1 day, 8:32:13"} +{"current_steps": 9470, "total_steps": 28254, "loss": 0.7388, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.73818545420488e-05, "epoch": 0.67, "percentage": 33.52, "elapsed_time": "16:23:41", "remaining_time": "1 day, 8:31:11"} +{"current_steps": 9480, "total_steps": 28254, "loss": 0.7496, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.735769795035477e-05, "epoch": 0.67, "percentage": 33.55, "elapsed_time": "16:24:42", "remaining_time": "1 day, 8:30:06"} +{"current_steps": 9490, "total_steps": 28254, "loss": 0.7716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.733352608027768e-05, "epoch": 0.67, "percentage": 33.59, "elapsed_time": "16:25:46", "remaining_time": "1 day, 8:29:06"} +{"current_steps": 9500, "total_steps": 28254, "loss": 0.7513, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.730933896170229e-05, "epoch": 0.67, "percentage": 33.62, "elapsed_time": "16:26:47", "remaining_time": "1 day, 8:28:02"} +{"current_steps": 9510, "total_steps": 28254, "loss": 0.7472, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7285136624532244e-05, "epoch": 0.67, "percentage": 33.66, "elapsed_time": "16:27:51", "remaining_time": "1 day, 8:27:02"} +{"current_steps": 9520, "total_steps": 28254, "loss": 0.726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.726091909868998e-05, "epoch": 0.67, "percentage": 33.69, "elapsed_time": "16:28:54", "remaining_time": "1 day, 8:26:02"} +{"current_steps": 9530, "total_steps": 28254, "loss": 0.728, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7236686414116736e-05, "epoch": 0.67, "percentage": 33.73, "elapsed_time": "16:29:54", "remaining_time": "1 day, 8:24:54"} +{"current_steps": 9540, "total_steps": 28254, "loss": 0.7283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.721243860077247e-05, "epoch": 0.68, "percentage": 33.77, "elapsed_time": "16:30:57", "remaining_time": "1 day, 8:23:53"} +{"current_steps": 9550, "total_steps": 28254, "loss": 0.7674, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.718817568863586e-05, "epoch": 0.68, "percentage": 33.8, "elapsed_time": "16:32:01", "remaining_time": "1 day, 8:22:55"} +{"current_steps": 9560, "total_steps": 28254, "loss": 0.738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7163897707704244e-05, "epoch": 0.68, "percentage": 33.84, "elapsed_time": "16:33:02", "remaining_time": "1 day, 8:21:50"} +{"current_steps": 9570, "total_steps": 28254, "loss": 0.7461, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.71396046879936e-05, "epoch": 0.68, "percentage": 33.87, "elapsed_time": "16:34:04", "remaining_time": "1 day, 8:20:47"} +{"current_steps": 9580, "total_steps": 28254, "loss": 0.7427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.711529665953847e-05, "epoch": 0.68, "percentage": 33.91, "elapsed_time": "16:35:07", "remaining_time": "1 day, 8:19:46"} +{"current_steps": 9590, "total_steps": 28254, "loss": 0.7268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7090973652392e-05, "epoch": 0.68, "percentage": 33.94, "elapsed_time": "16:36:09", "remaining_time": "1 day, 8:18:43"} +{"current_steps": 9600, "total_steps": 28254, "loss": 0.7508, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.706663569662581e-05, "epoch": 0.68, "percentage": 33.98, "elapsed_time": "16:37:12", "remaining_time": "1 day, 8:17:42"} +{"current_steps": 9610, "total_steps": 28254, "loss": 0.7623, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.704228282233003e-05, "epoch": 0.68, "percentage": 34.01, "elapsed_time": "16:38:13", "remaining_time": "1 day, 8:16:36"} +{"current_steps": 9620, "total_steps": 28254, "loss": 0.7626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7017915059613214e-05, "epoch": 0.68, "percentage": 34.05, "elapsed_time": "16:39:16", "remaining_time": "1 day, 8:15:35"} +{"current_steps": 9630, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.699353243860235e-05, "epoch": 0.68, "percentage": 34.08, "elapsed_time": "16:40:18", "remaining_time": "1 day, 8:14:32"} +{"current_steps": 9640, "total_steps": 28254, "loss": 0.7422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.696913498944276e-05, "epoch": 0.68, "percentage": 34.12, "elapsed_time": "16:41:19", "remaining_time": "1 day, 8:13:28"} +{"current_steps": 9650, "total_steps": 28254, "loss": 0.7552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6944722742298135e-05, "epoch": 0.68, "percentage": 34.15, "elapsed_time": "16:42:21", "remaining_time": "1 day, 8:12:24"} +{"current_steps": 9660, "total_steps": 28254, "loss": 0.6867, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.692029572735042e-05, "epoch": 0.68, "percentage": 34.19, "elapsed_time": "16:43:25", "remaining_time": "1 day, 8:11:26"} +{"current_steps": 9670, "total_steps": 28254, "loss": 0.7644, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6895853974799876e-05, "epoch": 0.68, "percentage": 34.23, "elapsed_time": "16:44:28", "remaining_time": "1 day, 8:10:26"} +{"current_steps": 9680, "total_steps": 28254, "loss": 0.7547, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6871397514864924e-05, "epoch": 0.69, "percentage": 34.26, "elapsed_time": "16:45:33", "remaining_time": "1 day, 8:09:27"} +{"current_steps": 9690, "total_steps": 28254, "loss": 0.7313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6846926377782216e-05, "epoch": 0.69, "percentage": 34.3, "elapsed_time": "16:46:36", "remaining_time": "1 day, 8:08:26"} +{"current_steps": 9700, "total_steps": 28254, "loss": 0.7643, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.682244059380651e-05, "epoch": 0.69, "percentage": 34.33, "elapsed_time": "16:47:38", "remaining_time": "1 day, 8:07:24"} +{"current_steps": 9710, "total_steps": 28254, "loss": 0.7561, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6797940193210714e-05, "epoch": 0.69, "percentage": 34.37, "elapsed_time": "16:48:39", "remaining_time": "1 day, 8:06:19"} +{"current_steps": 9720, "total_steps": 28254, "loss": 0.7326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6773425206285765e-05, "epoch": 0.69, "percentage": 34.4, "elapsed_time": "16:49:40", "remaining_time": "1 day, 8:05:14"} +{"current_steps": 9730, "total_steps": 28254, "loss": 0.7435, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.674889566334067e-05, "epoch": 0.69, "percentage": 34.44, "elapsed_time": "16:50:43", "remaining_time": "1 day, 8:04:13"} +{"current_steps": 9740, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6724351594702404e-05, "epoch": 0.69, "percentage": 34.47, "elapsed_time": "16:51:45", "remaining_time": "1 day, 8:03:10"} +{"current_steps": 9750, "total_steps": 28254, "loss": 0.7106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6699793030715933e-05, "epoch": 0.69, "percentage": 34.51, "elapsed_time": "16:52:49", "remaining_time": "1 day, 8:02:10"} +{"current_steps": 9760, "total_steps": 28254, "loss": 0.7552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.66752200017441e-05, "epoch": 0.69, "percentage": 34.54, "elapsed_time": "16:53:50", "remaining_time": "1 day, 8:01:05"} +{"current_steps": 9770, "total_steps": 28254, "loss": 0.7305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6650632538167674e-05, "epoch": 0.69, "percentage": 34.58, "elapsed_time": "16:54:53", "remaining_time": "1 day, 8:00:05"} +{"current_steps": 9780, "total_steps": 28254, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.662603067038524e-05, "epoch": 0.69, "percentage": 34.61, "elapsed_time": "16:55:56", "remaining_time": "1 day, 7:59:04"} +{"current_steps": 9790, "total_steps": 28254, "loss": 0.7464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.660141442881322e-05, "epoch": 0.69, "percentage": 34.65, "elapsed_time": "16:56:57", "remaining_time": "1 day, 7:57:58"} +{"current_steps": 9800, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.657678384388578e-05, "epoch": 0.69, "percentage": 34.69, "elapsed_time": "16:57:59", "remaining_time": "1 day, 7:56:57"} +{"current_steps": 9810, "total_steps": 28254, "loss": 0.7587, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.655213894605483e-05, "epoch": 0.69, "percentage": 34.72, "elapsed_time": "16:59:01", "remaining_time": "1 day, 7:55:54"} +{"current_steps": 9820, "total_steps": 28254, "loss": 0.7431, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.652747976578998e-05, "epoch": 0.7, "percentage": 34.76, "elapsed_time": "17:00:04", "remaining_time": "1 day, 7:54:52"} +{"current_steps": 9830, "total_steps": 28254, "loss": 0.7776, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.650280633357849e-05, "epoch": 0.7, "percentage": 34.79, "elapsed_time": "17:01:08", "remaining_time": "1 day, 7:53:52"} +{"current_steps": 9840, "total_steps": 28254, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6478118679925254e-05, "epoch": 0.7, "percentage": 34.83, "elapsed_time": "17:02:10", "remaining_time": "1 day, 7:52:50"} +{"current_steps": 9850, "total_steps": 28254, "loss": 0.7521, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6453416835352725e-05, "epoch": 0.7, "percentage": 34.86, "elapsed_time": "17:03:12", "remaining_time": "1 day, 7:51:47"} +{"current_steps": 9860, "total_steps": 28254, "loss": 0.7532, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.642870083040093e-05, "epoch": 0.7, "percentage": 34.9, "elapsed_time": "17:04:14", "remaining_time": "1 day, 7:50:44"} +{"current_steps": 9870, "total_steps": 28254, "loss": 0.7215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6403970695627384e-05, "epoch": 0.7, "percentage": 34.93, "elapsed_time": "17:05:17", "remaining_time": "1 day, 7:49:43"} +{"current_steps": 9880, "total_steps": 28254, "loss": 0.7475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.637922646160706e-05, "epoch": 0.7, "percentage": 34.97, "elapsed_time": "17:06:20", "remaining_time": "1 day, 7:48:42"} +{"current_steps": 9890, "total_steps": 28254, "loss": 0.757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6354468158932395e-05, "epoch": 0.7, "percentage": 35.0, "elapsed_time": "17:07:22", "remaining_time": "1 day, 7:47:40"} +{"current_steps": 9900, "total_steps": 28254, "loss": 0.7066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.632969581821321e-05, "epoch": 0.7, "percentage": 35.04, "elapsed_time": "17:08:24", "remaining_time": "1 day, 7:46:36"} +{"current_steps": 9910, "total_steps": 28254, "loss": 0.7627, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6304909470076645e-05, "epoch": 0.7, "percentage": 35.07, "elapsed_time": "17:09:26", "remaining_time": "1 day, 7:45:33"} +{"current_steps": 9920, "total_steps": 28254, "loss": 0.7341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.628010914516723e-05, "epoch": 0.7, "percentage": 35.11, "elapsed_time": "17:10:28", "remaining_time": "1 day, 7:44:29"} +{"current_steps": 9930, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6255294874146684e-05, "epoch": 0.7, "percentage": 35.15, "elapsed_time": "17:11:30", "remaining_time": "1 day, 7:43:27"} +{"current_steps": 9940, "total_steps": 28254, "loss": 0.7241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6230466687694054e-05, "epoch": 0.7, "percentage": 35.18, "elapsed_time": "17:12:32", "remaining_time": "1 day, 7:42:25"} +{"current_steps": 9950, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.620562461650553e-05, "epoch": 0.7, "percentage": 35.22, "elapsed_time": "17:13:34", "remaining_time": "1 day, 7:41:21"} +{"current_steps": 9960, "total_steps": 28254, "loss": 0.7487, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.618076869129452e-05, "epoch": 0.7, "percentage": 35.25, "elapsed_time": "17:14:36", "remaining_time": "1 day, 7:40:19"} +{"current_steps": 9970, "total_steps": 28254, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.61558989427915e-05, "epoch": 0.71, "percentage": 35.29, "elapsed_time": "17:15:38", "remaining_time": "1 day, 7:39:16"} +{"current_steps": 9980, "total_steps": 28254, "loss": 0.7476, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.61310154017441e-05, "epoch": 0.71, "percentage": 35.32, "elapsed_time": "17:16:39", "remaining_time": "1 day, 7:38:10"} +{"current_steps": 9990, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6106118098916954e-05, "epoch": 0.71, "percentage": 35.36, "elapsed_time": "17:17:42", "remaining_time": "1 day, 7:37:09"} +{"current_steps": 10000, "total_steps": 28254, "loss": 0.7288, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.608120706509173e-05, "epoch": 0.71, "percentage": 35.39, "elapsed_time": "17:18:41", "remaining_time": "1 day, 7:36:02"} +{"current_steps": 10010, "total_steps": 28254, "loss": 0.7491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.605628233106707e-05, "epoch": 0.71, "percentage": 35.43, "elapsed_time": "17:19:43", "remaining_time": "1 day, 7:34:58"} +{"current_steps": 10020, "total_steps": 28254, "loss": 0.7687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6031343927658564e-05, "epoch": 0.71, "percentage": 35.46, "elapsed_time": "17:20:44", "remaining_time": "1 day, 7:33:53"} +{"current_steps": 10030, "total_steps": 28254, "loss": 0.7579, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.600639188569868e-05, "epoch": 0.71, "percentage": 35.5, "elapsed_time": "17:21:44", "remaining_time": "1 day, 7:32:48"} +{"current_steps": 10040, "total_steps": 28254, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.598142623603676e-05, "epoch": 0.71, "percentage": 35.53, "elapsed_time": "17:22:46", "remaining_time": "1 day, 7:31:44"} +{"current_steps": 10050, "total_steps": 28254, "loss": 0.7501, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.595644700953898e-05, "epoch": 0.71, "percentage": 35.57, "elapsed_time": "17:23:49", "remaining_time": "1 day, 7:30:44"} +{"current_steps": 10060, "total_steps": 28254, "loss": 0.713, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5931454237088283e-05, "epoch": 0.71, "percentage": 35.61, "elapsed_time": "17:24:54", "remaining_time": "1 day, 7:29:45"} +{"current_steps": 10070, "total_steps": 28254, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.590644794958438e-05, "epoch": 0.71, "percentage": 35.64, "elapsed_time": "17:25:56", "remaining_time": "1 day, 7:28:43"} +{"current_steps": 10080, "total_steps": 28254, "loss": 0.7051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5881428177943674e-05, "epoch": 0.71, "percentage": 35.68, "elapsed_time": "17:26:58", "remaining_time": "1 day, 7:27:41"} +{"current_steps": 10090, "total_steps": 28254, "loss": 0.75, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5856394953099234e-05, "epoch": 0.71, "percentage": 35.71, "elapsed_time": "17:27:59", "remaining_time": "1 day, 7:26:35"} +{"current_steps": 10100, "total_steps": 28254, "loss": 0.7514, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.583134830600079e-05, "epoch": 0.71, "percentage": 35.75, "elapsed_time": "17:29:01", "remaining_time": "1 day, 7:25:32"} +{"current_steps": 10110, "total_steps": 28254, "loss": 0.7233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5806288267614636e-05, "epoch": 0.72, "percentage": 35.78, "elapsed_time": "17:30:03", "remaining_time": "1 day, 7:24:29"} +{"current_steps": 10120, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5781214868923633e-05, "epoch": 0.72, "percentage": 35.82, "elapsed_time": "17:31:05", "remaining_time": "1 day, 7:23:26"} +{"current_steps": 10130, "total_steps": 28254, "loss": 0.7144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.575612814092718e-05, "epoch": 0.72, "percentage": 35.85, "elapsed_time": "17:32:07", "remaining_time": "1 day, 7:22:24"} +{"current_steps": 10140, "total_steps": 28254, "loss": 0.7626, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5731028114641116e-05, "epoch": 0.72, "percentage": 35.89, "elapsed_time": "17:33:11", "remaining_time": "1 day, 7:21:24"} +{"current_steps": 10150, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.570591482109777e-05, "epoch": 0.72, "percentage": 35.92, "elapsed_time": "17:34:12", "remaining_time": "1 day, 7:20:19"} +{"current_steps": 10160, "total_steps": 28254, "loss": 0.737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.568078829134582e-05, "epoch": 0.72, "percentage": 35.96, "elapsed_time": "17:35:15", "remaining_time": "1 day, 7:19:18"} +{"current_steps": 10170, "total_steps": 28254, "loss": 0.7606, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5655648556450356e-05, "epoch": 0.72, "percentage": 35.99, "elapsed_time": "17:36:19", "remaining_time": "1 day, 7:18:20"} +{"current_steps": 10180, "total_steps": 28254, "loss": 0.7435, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.563049564749275e-05, "epoch": 0.72, "percentage": 36.03, "elapsed_time": "17:37:18", "remaining_time": "1 day, 7:17:11"} +{"current_steps": 10190, "total_steps": 28254, "loss": 0.7496, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5605329595570714e-05, "epoch": 0.72, "percentage": 36.07, "elapsed_time": "17:38:20", "remaining_time": "1 day, 7:16:08"} +{"current_steps": 10200, "total_steps": 28254, "loss": 0.7282, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.558015043179816e-05, "epoch": 0.72, "percentage": 36.1, "elapsed_time": "17:39:22", "remaining_time": "1 day, 7:15:05"} +{"current_steps": 10210, "total_steps": 28254, "loss": 0.7563, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.555495818730524e-05, "epoch": 0.72, "percentage": 36.14, "elapsed_time": "17:40:24", "remaining_time": "1 day, 7:14:03"} +{"current_steps": 10220, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5529752893238264e-05, "epoch": 0.72, "percentage": 36.17, "elapsed_time": "17:41:25", "remaining_time": "1 day, 7:12:57"} +{"current_steps": 10230, "total_steps": 28254, "loss": 0.761, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5504534580759695e-05, "epoch": 0.72, "percentage": 36.21, "elapsed_time": "17:42:27", "remaining_time": "1 day, 7:11:54"} +{"current_steps": 10240, "total_steps": 28254, "loss": 0.7364, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.547930328104806e-05, "epoch": 0.72, "percentage": 36.24, "elapsed_time": "17:43:28", "remaining_time": "1 day, 7:10:51"} +{"current_steps": 10250, "total_steps": 28254, "loss": 0.7307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.545405902529797e-05, "epoch": 0.73, "percentage": 36.28, "elapsed_time": "17:44:32", "remaining_time": "1 day, 7:09:51"} +{"current_steps": 10260, "total_steps": 28254, "loss": 0.7517, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.542880184472004e-05, "epoch": 0.73, "percentage": 36.31, "elapsed_time": "17:45:35", "remaining_time": "1 day, 7:08:50"} +{"current_steps": 10270, "total_steps": 28254, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.540353177054088e-05, "epoch": 0.73, "percentage": 36.35, "elapsed_time": "17:46:35", "remaining_time": "1 day, 7:07:44"} +{"current_steps": 10280, "total_steps": 28254, "loss": 0.73, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5378248834003017e-05, "epoch": 0.73, "percentage": 36.38, "elapsed_time": "17:47:37", "remaining_time": "1 day, 7:06:41"} +{"current_steps": 10290, "total_steps": 28254, "loss": 0.7336, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.535295306636489e-05, "epoch": 0.73, "percentage": 36.42, "elapsed_time": "17:48:39", "remaining_time": "1 day, 7:05:38"} +{"current_steps": 10300, "total_steps": 28254, "loss": 0.7248, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5327644498900824e-05, "epoch": 0.73, "percentage": 36.46, "elapsed_time": "17:49:43", "remaining_time": "1 day, 7:04:38"} +{"current_steps": 10310, "total_steps": 28254, "loss": 0.7291, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.530232316290094e-05, "epoch": 0.73, "percentage": 36.49, "elapsed_time": "17:50:45", "remaining_time": "1 day, 7:03:36"} +{"current_steps": 10320, "total_steps": 28254, "loss": 0.7609, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5276989089671154e-05, "epoch": 0.73, "percentage": 36.53, "elapsed_time": "17:51:48", "remaining_time": "1 day, 7:02:34"} +{"current_steps": 10330, "total_steps": 28254, "loss": 0.7445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5251642310533135e-05, "epoch": 0.73, "percentage": 36.56, "elapsed_time": "17:52:53", "remaining_time": "1 day, 7:01:36"} +{"current_steps": 10340, "total_steps": 28254, "loss": 0.7711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.522628285682425e-05, "epoch": 0.73, "percentage": 36.6, "elapsed_time": "17:53:56", "remaining_time": "1 day, 7:00:35"} +{"current_steps": 10350, "total_steps": 28254, "loss": 0.7469, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.520091075989755e-05, "epoch": 0.73, "percentage": 36.63, "elapsed_time": "17:55:00", "remaining_time": "1 day, 6:59:36"} +{"current_steps": 10360, "total_steps": 28254, "loss": 0.7453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.517552605112171e-05, "epoch": 0.73, "percentage": 36.67, "elapsed_time": "17:56:00", "remaining_time": "1 day, 6:58:30"} +{"current_steps": 10370, "total_steps": 28254, "loss": 0.726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.515012876188099e-05, "epoch": 0.73, "percentage": 36.7, "elapsed_time": "17:57:03", "remaining_time": "1 day, 6:57:28"} +{"current_steps": 10380, "total_steps": 28254, "loss": 0.7439, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.512471892357522e-05, "epoch": 0.73, "percentage": 36.74, "elapsed_time": "17:58:04", "remaining_time": "1 day, 6:56:23"} +{"current_steps": 10390, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.509929656761973e-05, "epoch": 0.74, "percentage": 36.77, "elapsed_time": "17:59:05", "remaining_time": "1 day, 6:55:20"} +{"current_steps": 10400, "total_steps": 28254, "loss": 0.7795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.507386172544534e-05, "epoch": 0.74, "percentage": 36.81, "elapsed_time": "18:00:08", "remaining_time": "1 day, 6:54:17"} +{"current_steps": 10410, "total_steps": 28254, "loss": 0.7389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.50484144284983e-05, "epoch": 0.74, "percentage": 36.84, "elapsed_time": "18:01:08", "remaining_time": "1 day, 6:53:12"} +{"current_steps": 10420, "total_steps": 28254, "loss": 0.7409, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.502295470824026e-05, "epoch": 0.74, "percentage": 36.88, "elapsed_time": "18:02:11", "remaining_time": "1 day, 6:52:10"} +{"current_steps": 10430, "total_steps": 28254, "loss": 0.7453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4997482596148215e-05, "epoch": 0.74, "percentage": 36.92, "elapsed_time": "18:03:14", "remaining_time": "1 day, 6:51:10"} +{"current_steps": 10440, "total_steps": 28254, "loss": 0.7331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.497199812371451e-05, "epoch": 0.74, "percentage": 36.95, "elapsed_time": "18:04:15", "remaining_time": "1 day, 6:50:05"} +{"current_steps": 10450, "total_steps": 28254, "loss": 0.7345, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4946501322446745e-05, "epoch": 0.74, "percentage": 36.99, "elapsed_time": "18:05:17", "remaining_time": "1 day, 6:49:02"} +{"current_steps": 10460, "total_steps": 28254, "loss": 0.7448, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4920992223867784e-05, "epoch": 0.74, "percentage": 37.02, "elapsed_time": "18:06:19", "remaining_time": "1 day, 6:48:00"} +{"current_steps": 10470, "total_steps": 28254, "loss": 0.7118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.489547085951567e-05, "epoch": 0.74, "percentage": 37.06, "elapsed_time": "18:07:23", "remaining_time": "1 day, 6:47:00"} +{"current_steps": 10480, "total_steps": 28254, "loss": 0.741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.486993726094363e-05, "epoch": 0.74, "percentage": 37.09, "elapsed_time": "18:08:25", "remaining_time": "1 day, 6:45:57"} +{"current_steps": 10490, "total_steps": 28254, "loss": 0.708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4844391459720014e-05, "epoch": 0.74, "percentage": 37.13, "elapsed_time": "18:09:29", "remaining_time": "1 day, 6:44:57"} +{"current_steps": 10500, "total_steps": 28254, "loss": 0.7703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.481883348742826e-05, "epoch": 0.74, "percentage": 37.16, "elapsed_time": "18:10:33", "remaining_time": "1 day, 6:43:58"} +{"current_steps": 10510, "total_steps": 28254, "loss": 0.7467, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.479326337566683e-05, "epoch": 0.74, "percentage": 37.2, "elapsed_time": "18:11:34", "remaining_time": "1 day, 6:42:54"} +{"current_steps": 10520, "total_steps": 28254, "loss": 0.7501, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4767681156049236e-05, "epoch": 0.74, "percentage": 37.23, "elapsed_time": "18:12:38", "remaining_time": "1 day, 6:41:54"} +{"current_steps": 10530, "total_steps": 28254, "loss": 0.764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4742086860203926e-05, "epoch": 0.75, "percentage": 37.27, "elapsed_time": "18:13:41", "remaining_time": "1 day, 6:40:54"} +{"current_steps": 10540, "total_steps": 28254, "loss": 0.7412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.47164805197743e-05, "epoch": 0.75, "percentage": 37.3, "elapsed_time": "18:14:44", "remaining_time": "1 day, 6:39:52"} +{"current_steps": 10550, "total_steps": 28254, "loss": 0.7403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.469086216641863e-05, "epoch": 0.75, "percentage": 37.34, "elapsed_time": "18:15:47", "remaining_time": "1 day, 6:38:50"} +{"current_steps": 10560, "total_steps": 28254, "loss": 0.7317, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.466523183181005e-05, "epoch": 0.75, "percentage": 37.38, "elapsed_time": "18:16:48", "remaining_time": "1 day, 6:37:46"} +{"current_steps": 10570, "total_steps": 28254, "loss": 0.7539, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.463958954763652e-05, "epoch": 0.75, "percentage": 37.41, "elapsed_time": "18:17:51", "remaining_time": "1 day, 6:36:45"} +{"current_steps": 10580, "total_steps": 28254, "loss": 0.7554, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.461393534560073e-05, "epoch": 0.75, "percentage": 37.45, "elapsed_time": "18:18:55", "remaining_time": "1 day, 6:35:45"} +{"current_steps": 10590, "total_steps": 28254, "loss": 0.7161, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.458826925742017e-05, "epoch": 0.75, "percentage": 37.48, "elapsed_time": "18:19:58", "remaining_time": "1 day, 6:34:44"} +{"current_steps": 10600, "total_steps": 28254, "loss": 0.7023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.456259131482696e-05, "epoch": 0.75, "percentage": 37.52, "elapsed_time": "18:21:00", "remaining_time": "1 day, 6:33:41"} +{"current_steps": 10610, "total_steps": 28254, "loss": 0.7644, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.453690154956793e-05, "epoch": 0.75, "percentage": 37.55, "elapsed_time": "18:22:04", "remaining_time": "1 day, 6:32:41"} +{"current_steps": 10620, "total_steps": 28254, "loss": 0.7552, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4511199993404496e-05, "epoch": 0.75, "percentage": 37.59, "elapsed_time": "18:23:05", "remaining_time": "1 day, 6:31:37"} +{"current_steps": 10630, "total_steps": 28254, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.448548667811265e-05, "epoch": 0.75, "percentage": 37.62, "elapsed_time": "18:24:07", "remaining_time": "1 day, 6:30:35"} +{"current_steps": 10640, "total_steps": 28254, "loss": 0.7464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.445976163548294e-05, "epoch": 0.75, "percentage": 37.66, "elapsed_time": "18:25:10", "remaining_time": "1 day, 6:29:33"} +{"current_steps": 10650, "total_steps": 28254, "loss": 0.7252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.443402489732041e-05, "epoch": 0.75, "percentage": 37.69, "elapsed_time": "18:26:14", "remaining_time": "1 day, 6:28:34"} +{"current_steps": 10660, "total_steps": 28254, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4408276495444534e-05, "epoch": 0.75, "percentage": 37.73, "elapsed_time": "18:27:17", "remaining_time": "1 day, 6:27:33"} +{"current_steps": 10670, "total_steps": 28254, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.438251646168926e-05, "epoch": 0.76, "percentage": 37.76, "elapsed_time": "18:28:17", "remaining_time": "1 day, 6:26:27"} +{"current_steps": 10680, "total_steps": 28254, "loss": 0.7544, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.435674482790287e-05, "epoch": 0.76, "percentage": 37.8, "elapsed_time": "18:29:21", "remaining_time": "1 day, 6:25:27"} +{"current_steps": 10690, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.433096162594801e-05, "epoch": 0.76, "percentage": 37.84, "elapsed_time": "18:30:24", "remaining_time": "1 day, 6:24:25"} +{"current_steps": 10700, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.430516688770161e-05, "epoch": 0.76, "percentage": 37.87, "elapsed_time": "18:31:26", "remaining_time": "1 day, 6:23:22"} +{"current_steps": 10710, "total_steps": 28254, "loss": 0.7235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4279360645054905e-05, "epoch": 0.76, "percentage": 37.91, "elapsed_time": "18:32:28", "remaining_time": "1 day, 6:22:20"} +{"current_steps": 10720, "total_steps": 28254, "loss": 0.7559, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.425354292991329e-05, "epoch": 0.76, "percentage": 37.94, "elapsed_time": "18:33:30", "remaining_time": "1 day, 6:21:18"} +{"current_steps": 10730, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4227713774196415e-05, "epoch": 0.76, "percentage": 37.98, "elapsed_time": "18:34:33", "remaining_time": "1 day, 6:20:16"} +{"current_steps": 10740, "total_steps": 28254, "loss": 0.7245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4201873209838e-05, "epoch": 0.76, "percentage": 38.01, "elapsed_time": "18:35:35", "remaining_time": "1 day, 6:19:13"} +{"current_steps": 10750, "total_steps": 28254, "loss": 0.7257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.417602126878593e-05, "epoch": 0.76, "percentage": 38.05, "elapsed_time": "18:36:36", "remaining_time": "1 day, 6:18:08"} +{"current_steps": 10760, "total_steps": 28254, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.415015798300214e-05, "epoch": 0.76, "percentage": 38.08, "elapsed_time": "18:37:40", "remaining_time": "1 day, 6:17:09"} +{"current_steps": 10770, "total_steps": 28254, "loss": 0.7503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.412428338446257e-05, "epoch": 0.76, "percentage": 38.12, "elapsed_time": "18:38:42", "remaining_time": "1 day, 6:16:06"} +{"current_steps": 10780, "total_steps": 28254, "loss": 0.7504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.409839750515717e-05, "epoch": 0.76, "percentage": 38.15, "elapsed_time": "18:39:45", "remaining_time": "1 day, 6:15:05"} +{"current_steps": 10790, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.407250037708982e-05, "epoch": 0.76, "percentage": 38.19, "elapsed_time": "18:40:46", "remaining_time": "1 day, 6:14:01"} +{"current_steps": 10800, "total_steps": 28254, "loss": 0.7614, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.404659203227832e-05, "epoch": 0.76, "percentage": 38.22, "elapsed_time": "18:41:51", "remaining_time": "1 day, 6:13:02"} +{"current_steps": 10810, "total_steps": 28254, "loss": 0.7691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4020672502754333e-05, "epoch": 0.77, "percentage": 38.26, "elapsed_time": "18:42:53", "remaining_time": "1 day, 6:12:00"} +{"current_steps": 10820, "total_steps": 28254, "loss": 0.7403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3994741820563344e-05, "epoch": 0.77, "percentage": 38.3, "elapsed_time": "18:43:57", "remaining_time": "1 day, 6:11:00"} +{"current_steps": 10830, "total_steps": 28254, "loss": 0.7404, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3968800017764645e-05, "epoch": 0.77, "percentage": 38.33, "elapsed_time": "18:44:57", "remaining_time": "1 day, 6:09:55"} +{"current_steps": 10840, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.394284712643126e-05, "epoch": 0.77, "percentage": 38.37, "elapsed_time": "18:46:01", "remaining_time": "1 day, 6:08:55"} +{"current_steps": 10850, "total_steps": 28254, "loss": 0.7452, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.391688317864992e-05, "epoch": 0.77, "percentage": 38.4, "elapsed_time": "18:47:05", "remaining_time": "1 day, 6:07:55"} +{"current_steps": 10860, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.389090820652104e-05, "epoch": 0.77, "percentage": 38.44, "elapsed_time": "18:48:08", "remaining_time": "1 day, 6:06:53"} +{"current_steps": 10870, "total_steps": 28254, "loss": 0.7231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.386492224215865e-05, "epoch": 0.77, "percentage": 38.47, "elapsed_time": "18:49:10", "remaining_time": "1 day, 6:05:50"} +{"current_steps": 10880, "total_steps": 28254, "loss": 0.7617, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.383892531769039e-05, "epoch": 0.77, "percentage": 38.51, "elapsed_time": "18:50:15", "remaining_time": "1 day, 6:04:53"} +{"current_steps": 10890, "total_steps": 28254, "loss": 0.7573, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.381291746525742e-05, "epoch": 0.77, "percentage": 38.54, "elapsed_time": "18:51:19", "remaining_time": "1 day, 6:03:52"} +{"current_steps": 10900, "total_steps": 28254, "loss": 0.7483, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.378689871701445e-05, "epoch": 0.77, "percentage": 38.58, "elapsed_time": "18:52:21", "remaining_time": "1 day, 6:02:50"} +{"current_steps": 10910, "total_steps": 28254, "loss": 0.742, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.376086910512962e-05, "epoch": 0.77, "percentage": 38.61, "elapsed_time": "18:53:22", "remaining_time": "1 day, 6:01:45"} +{"current_steps": 10920, "total_steps": 28254, "loss": 0.7302, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3734828661784535e-05, "epoch": 0.77, "percentage": 38.65, "elapsed_time": "18:54:24", "remaining_time": "1 day, 6:00:43"} +{"current_steps": 10930, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.370877741917418e-05, "epoch": 0.77, "percentage": 38.68, "elapsed_time": "18:55:26", "remaining_time": "1 day, 5:59:39"} +{"current_steps": 10940, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.368271540950687e-05, "epoch": 0.77, "percentage": 38.72, "elapsed_time": "18:56:27", "remaining_time": "1 day, 5:58:35"} +{"current_steps": 10950, "total_steps": 28254, "loss": 0.7372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.365664266500426e-05, "epoch": 0.78, "percentage": 38.76, "elapsed_time": "18:57:32", "remaining_time": "1 day, 5:57:37"} +{"current_steps": 10960, "total_steps": 28254, "loss": 0.768, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.363055921790128e-05, "epoch": 0.78, "percentage": 38.79, "elapsed_time": "18:58:33", "remaining_time": "1 day, 5:56:33"} +{"current_steps": 10970, "total_steps": 28254, "loss": 0.7356, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3604465100446064e-05, "epoch": 0.78, "percentage": 38.83, "elapsed_time": "18:59:36", "remaining_time": "1 day, 5:55:31"} +{"current_steps": 10980, "total_steps": 28254, "loss": 0.7345, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3578360344899965e-05, "epoch": 0.78, "percentage": 38.86, "elapsed_time": "19:00:39", "remaining_time": "1 day, 5:54:30"} +{"current_steps": 10990, "total_steps": 28254, "loss": 0.708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.355224498353747e-05, "epoch": 0.78, "percentage": 38.9, "elapsed_time": "19:01:40", "remaining_time": "1 day, 5:53:26"} +{"current_steps": 11000, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3526119048646196e-05, "epoch": 0.78, "percentage": 38.93, "elapsed_time": "19:02:43", "remaining_time": "1 day, 5:52:25"} +{"current_steps": 11010, "total_steps": 28254, "loss": 0.7346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.349998257252681e-05, "epoch": 0.78, "percentage": 38.97, "elapsed_time": "19:03:45", "remaining_time": "1 day, 5:51:22"} +{"current_steps": 11020, "total_steps": 28254, "loss": 0.7535, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.347383558749303e-05, "epoch": 0.78, "percentage": 39.0, "elapsed_time": "19:04:47", "remaining_time": "1 day, 5:50:19"} +{"current_steps": 11030, "total_steps": 28254, "loss": 0.7271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.344767812587157e-05, "epoch": 0.78, "percentage": 39.04, "elapsed_time": "19:05:49", "remaining_time": "1 day, 5:49:16"} +{"current_steps": 11040, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.342151022000207e-05, "epoch": 0.78, "percentage": 39.07, "elapsed_time": "19:06:50", "remaining_time": "1 day, 5:48:12"} +{"current_steps": 11050, "total_steps": 28254, "loss": 0.7319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.339533190223711e-05, "epoch": 0.78, "percentage": 39.11, "elapsed_time": "19:07:52", "remaining_time": "1 day, 5:47:09"} +{"current_steps": 11060, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3369143204942125e-05, "epoch": 0.78, "percentage": 39.14, "elapsed_time": "19:08:55", "remaining_time": "1 day, 5:46:08"} +{"current_steps": 11070, "total_steps": 28254, "loss": 0.7375, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3342944160495406e-05, "epoch": 0.78, "percentage": 39.18, "elapsed_time": "19:09:56", "remaining_time": "1 day, 5:45:03"} +{"current_steps": 11080, "total_steps": 28254, "loss": 0.7354, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.331673480128801e-05, "epoch": 0.78, "percentage": 39.22, "elapsed_time": "19:11:00", "remaining_time": "1 day, 5:44:03"} +{"current_steps": 11090, "total_steps": 28254, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.329051515972376e-05, "epoch": 0.78, "percentage": 39.25, "elapsed_time": "19:12:03", "remaining_time": "1 day, 5:43:02"} +{"current_steps": 11100, "total_steps": 28254, "loss": 0.7464, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.326428526821919e-05, "epoch": 0.79, "percentage": 39.29, "elapsed_time": "19:13:05", "remaining_time": "1 day, 5:42:00"} +{"current_steps": 11110, "total_steps": 28254, "loss": 0.7313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3238045159203494e-05, "epoch": 0.79, "percentage": 39.32, "elapsed_time": "19:14:07", "remaining_time": "1 day, 5:40:56"} +{"current_steps": 11120, "total_steps": 28254, "loss": 0.7223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.321179486511853e-05, "epoch": 0.79, "percentage": 39.36, "elapsed_time": "19:15:08", "remaining_time": "1 day, 5:39:51"} +{"current_steps": 11130, "total_steps": 28254, "loss": 0.7402, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.318553441841872e-05, "epoch": 0.79, "percentage": 39.39, "elapsed_time": "19:16:08", "remaining_time": "1 day, 5:38:47"} +{"current_steps": 11140, "total_steps": 28254, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.315926385157105e-05, "epoch": 0.79, "percentage": 39.43, "elapsed_time": "19:17:12", "remaining_time": "1 day, 5:37:46"} +{"current_steps": 11150, "total_steps": 28254, "loss": 0.726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.313298319705501e-05, "epoch": 0.79, "percentage": 39.46, "elapsed_time": "19:18:13", "remaining_time": "1 day, 5:36:42"} +{"current_steps": 11160, "total_steps": 28254, "loss": 0.7543, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3106692487362555e-05, "epoch": 0.79, "percentage": 39.5, "elapsed_time": "19:19:17", "remaining_time": "1 day, 5:35:41"} +{"current_steps": 11170, "total_steps": 28254, "loss": 0.728, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3080391754998106e-05, "epoch": 0.79, "percentage": 39.53, "elapsed_time": "19:20:18", "remaining_time": "1 day, 5:34:38"} +{"current_steps": 11180, "total_steps": 28254, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.305408103247845e-05, "epoch": 0.79, "percentage": 39.57, "elapsed_time": "19:21:20", "remaining_time": "1 day, 5:33:35"} +{"current_steps": 11190, "total_steps": 28254, "loss": 0.7665, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3027760352332705e-05, "epoch": 0.79, "percentage": 39.61, "elapsed_time": "19:22:21", "remaining_time": "1 day, 5:32:31"} +{"current_steps": 11200, "total_steps": 28254, "loss": 0.7486, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.300142974710234e-05, "epoch": 0.79, "percentage": 39.64, "elapsed_time": "19:23:23", "remaining_time": "1 day, 5:31:28"} +{"current_steps": 11210, "total_steps": 28254, "loss": 0.7451, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.297508924934108e-05, "epoch": 0.79, "percentage": 39.68, "elapsed_time": "19:24:27", "remaining_time": "1 day, 5:30:29"} +{"current_steps": 11220, "total_steps": 28254, "loss": 0.7647, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2948738891614876e-05, "epoch": 0.79, "percentage": 39.71, "elapsed_time": "19:25:29", "remaining_time": "1 day, 5:29:24"} +{"current_steps": 11230, "total_steps": 28254, "loss": 0.7415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.292237870650187e-05, "epoch": 0.79, "percentage": 39.75, "elapsed_time": "19:26:32", "remaining_time": "1 day, 5:28:24"} +{"current_steps": 11240, "total_steps": 28254, "loss": 0.746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.289600872659235e-05, "epoch": 0.8, "percentage": 39.78, "elapsed_time": "19:27:33", "remaining_time": "1 day, 5:27:20"} +{"current_steps": 11250, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.286962898448873e-05, "epoch": 0.8, "percentage": 39.82, "elapsed_time": "19:28:35", "remaining_time": "1 day, 5:26:16"} +{"current_steps": 11260, "total_steps": 28254, "loss": 0.745, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.284323951280547e-05, "epoch": 0.8, "percentage": 39.85, "elapsed_time": "19:29:37", "remaining_time": "1 day, 5:25:14"} +{"current_steps": 11270, "total_steps": 28254, "loss": 0.7154, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.281684034416909e-05, "epoch": 0.8, "percentage": 39.89, "elapsed_time": "19:30:40", "remaining_time": "1 day, 5:24:13"} +{"current_steps": 11280, "total_steps": 28254, "loss": 0.7422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2790431511218064e-05, "epoch": 0.8, "percentage": 39.92, "elapsed_time": "19:31:46", "remaining_time": "1 day, 5:23:16"} +{"current_steps": 11290, "total_steps": 28254, "loss": 0.7168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.276401304660284e-05, "epoch": 0.8, "percentage": 39.96, "elapsed_time": "19:32:50", "remaining_time": "1 day, 5:22:17"} +{"current_steps": 11300, "total_steps": 28254, "loss": 0.7441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2737584982985766e-05, "epoch": 0.8, "percentage": 39.99, "elapsed_time": "19:33:52", "remaining_time": "1 day, 5:21:13"} +{"current_steps": 11310, "total_steps": 28254, "loss": 0.7541, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.271114735304105e-05, "epoch": 0.8, "percentage": 40.03, "elapsed_time": "19:34:54", "remaining_time": "1 day, 5:20:10"} +{"current_steps": 11320, "total_steps": 28254, "loss": 0.7001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2684700189454744e-05, "epoch": 0.8, "percentage": 40.07, "elapsed_time": "19:35:56", "remaining_time": "1 day, 5:19:08"} +{"current_steps": 11330, "total_steps": 28254, "loss": 0.7379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.265824352492467e-05, "epoch": 0.8, "percentage": 40.1, "elapsed_time": "19:36:59", "remaining_time": "1 day, 5:18:06"} +{"current_steps": 11340, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2631777392160403e-05, "epoch": 0.8, "percentage": 40.14, "elapsed_time": "19:38:00", "remaining_time": "1 day, 5:17:02"} +{"current_steps": 11350, "total_steps": 28254, "loss": 0.7386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2605301823883226e-05, "epoch": 0.8, "percentage": 40.17, "elapsed_time": "19:39:04", "remaining_time": "1 day, 5:16:03"} +{"current_steps": 11360, "total_steps": 28254, "loss": 0.7074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.257881685282609e-05, "epoch": 0.8, "percentage": 40.21, "elapsed_time": "19:40:06", "remaining_time": "1 day, 5:15:00"} +{"current_steps": 11370, "total_steps": 28254, "loss": 0.7308, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.255232251173357e-05, "epoch": 0.8, "percentage": 40.24, "elapsed_time": "19:41:08", "remaining_time": "1 day, 5:13:56"} +{"current_steps": 11380, "total_steps": 28254, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.252581883336181e-05, "epoch": 0.81, "percentage": 40.28, "elapsed_time": "19:42:11", "remaining_time": "1 day, 5:12:56"} +{"current_steps": 11390, "total_steps": 28254, "loss": 0.7334, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.249930585047852e-05, "epoch": 0.81, "percentage": 40.31, "elapsed_time": "19:43:15", "remaining_time": "1 day, 5:11:55"} +{"current_steps": 11400, "total_steps": 28254, "loss": 0.7444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2472783595862896e-05, "epoch": 0.81, "percentage": 40.35, "elapsed_time": "19:44:18", "remaining_time": "1 day, 5:10:53"} +{"current_steps": 11410, "total_steps": 28254, "loss": 0.7503, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2446252102305625e-05, "epoch": 0.81, "percentage": 40.38, "elapsed_time": "19:45:20", "remaining_time": "1 day, 5:09:51"} +{"current_steps": 11420, "total_steps": 28254, "loss": 0.7331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2419711402608774e-05, "epoch": 0.81, "percentage": 40.42, "elapsed_time": "19:46:22", "remaining_time": "1 day, 5:08:48"} +{"current_steps": 11430, "total_steps": 28254, "loss": 0.7449, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2393161529585836e-05, "epoch": 0.81, "percentage": 40.45, "elapsed_time": "19:47:25", "remaining_time": "1 day, 5:07:46"} +{"current_steps": 11440, "total_steps": 28254, "loss": 0.7125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.236660251606161e-05, "epoch": 0.81, "percentage": 40.49, "elapsed_time": "19:48:26", "remaining_time": "1 day, 5:06:43"} +{"current_steps": 11450, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2340034394872217e-05, "epoch": 0.81, "percentage": 40.53, "elapsed_time": "19:49:27", "remaining_time": "1 day, 5:05:39"} +{"current_steps": 11460, "total_steps": 28254, "loss": 0.7293, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.231345719886502e-05, "epoch": 0.81, "percentage": 40.56, "elapsed_time": "19:50:28", "remaining_time": "1 day, 5:04:34"} +{"current_steps": 11470, "total_steps": 28254, "loss": 0.7301, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.228687096089863e-05, "epoch": 0.81, "percentage": 40.6, "elapsed_time": "19:51:29", "remaining_time": "1 day, 5:03:30"} +{"current_steps": 11480, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.226027571384281e-05, "epoch": 0.81, "percentage": 40.63, "elapsed_time": "19:52:32", "remaining_time": "1 day, 5:02:28"} +{"current_steps": 11490, "total_steps": 28254, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2233671490578474e-05, "epoch": 0.81, "percentage": 40.67, "elapsed_time": "19:53:35", "remaining_time": "1 day, 5:01:27"} +{"current_steps": 11500, "total_steps": 28254, "loss": 0.7271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.220705832399763e-05, "epoch": 0.81, "percentage": 40.7, "elapsed_time": "19:54:39", "remaining_time": "1 day, 5:00:27"} +{"current_steps": 11510, "total_steps": 28254, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.218043624700335e-05, "epoch": 0.81, "percentage": 40.74, "elapsed_time": "19:55:40", "remaining_time": "1 day, 4:59:22"} +{"current_steps": 11520, "total_steps": 28254, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.215380529250971e-05, "epoch": 0.82, "percentage": 40.77, "elapsed_time": "19:56:43", "remaining_time": "1 day, 4:58:22"} +{"current_steps": 11530, "total_steps": 28254, "loss": 0.7455, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.212716549344177e-05, "epoch": 0.82, "percentage": 40.81, "elapsed_time": "19:57:46", "remaining_time": "1 day, 4:57:20"} +{"current_steps": 11540, "total_steps": 28254, "loss": 0.7609, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.210051688273552e-05, "epoch": 0.82, "percentage": 40.84, "elapsed_time": "19:58:47", "remaining_time": "1 day, 4:56:16"} +{"current_steps": 11550, "total_steps": 28254, "loss": 0.7306, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.207385949333785e-05, "epoch": 0.82, "percentage": 40.88, "elapsed_time": "19:59:48", "remaining_time": "1 day, 4:55:12"} +{"current_steps": 11560, "total_steps": 28254, "loss": 0.7132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.204719335820651e-05, "epoch": 0.82, "percentage": 40.91, "elapsed_time": "20:00:51", "remaining_time": "1 day, 4:54:10"} +{"current_steps": 11570, "total_steps": 28254, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.202051851031004e-05, "epoch": 0.82, "percentage": 40.95, "elapsed_time": "20:01:53", "remaining_time": "1 day, 4:53:07"} +{"current_steps": 11580, "total_steps": 28254, "loss": 0.7182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.199383498262777e-05, "epoch": 0.82, "percentage": 40.99, "elapsed_time": "20:02:55", "remaining_time": "1 day, 4:52:05"} +{"current_steps": 11590, "total_steps": 28254, "loss": 0.7235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.196714280814976e-05, "epoch": 0.82, "percentage": 41.02, "elapsed_time": "20:03:57", "remaining_time": "1 day, 4:51:02"} +{"current_steps": 11600, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.194044201987675e-05, "epoch": 0.82, "percentage": 41.06, "elapsed_time": "20:04:59", "remaining_time": "1 day, 4:49:59"} +{"current_steps": 11610, "total_steps": 28254, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.191373265082015e-05, "epoch": 0.82, "percentage": 41.09, "elapsed_time": "20:06:02", "remaining_time": "1 day, 4:48:57"} +{"current_steps": 11620, "total_steps": 28254, "loss": 0.7232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.188701473400195e-05, "epoch": 0.82, "percentage": 41.13, "elapsed_time": "20:07:07", "remaining_time": "1 day, 4:47:59"} +{"current_steps": 11630, "total_steps": 28254, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1860288302454735e-05, "epoch": 0.82, "percentage": 41.16, "elapsed_time": "20:08:09", "remaining_time": "1 day, 4:46:56"} +{"current_steps": 11640, "total_steps": 28254, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.18335533892216e-05, "epoch": 0.82, "percentage": 41.2, "elapsed_time": "20:09:13", "remaining_time": "1 day, 4:45:57"} +{"current_steps": 11650, "total_steps": 28254, "loss": 0.7403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.180681002735614e-05, "epoch": 0.82, "percentage": 41.23, "elapsed_time": "20:10:14", "remaining_time": "1 day, 4:44:53"} +{"current_steps": 11660, "total_steps": 28254, "loss": 0.7395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.178005824992237e-05, "epoch": 0.83, "percentage": 41.27, "elapsed_time": "20:11:17", "remaining_time": "1 day, 4:43:51"} +{"current_steps": 11670, "total_steps": 28254, "loss": 0.738, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.175329808999475e-05, "epoch": 0.83, "percentage": 41.3, "elapsed_time": "20:12:19", "remaining_time": "1 day, 4:42:48"} +{"current_steps": 11680, "total_steps": 28254, "loss": 0.7386, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.172652958065806e-05, "epoch": 0.83, "percentage": 41.34, "elapsed_time": "20:13:22", "remaining_time": "1 day, 4:41:47"} +{"current_steps": 11690, "total_steps": 28254, "loss": 0.6953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.169975275500743e-05, "epoch": 0.83, "percentage": 41.37, "elapsed_time": "20:14:25", "remaining_time": "1 day, 4:40:45"} +{"current_steps": 11700, "total_steps": 28254, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1672967646148285e-05, "epoch": 0.83, "percentage": 41.41, "elapsed_time": "20:15:27", "remaining_time": "1 day, 4:39:43"} +{"current_steps": 11710, "total_steps": 28254, "loss": 0.737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.164617428719624e-05, "epoch": 0.83, "percentage": 41.45, "elapsed_time": "20:16:30", "remaining_time": "1 day, 4:38:41"} +{"current_steps": 11720, "total_steps": 28254, "loss": 0.7133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.161937271127717e-05, "epoch": 0.83, "percentage": 41.48, "elapsed_time": "20:17:32", "remaining_time": "1 day, 4:37:38"} +{"current_steps": 11730, "total_steps": 28254, "loss": 0.7289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.159256295152705e-05, "epoch": 0.83, "percentage": 41.52, "elapsed_time": "20:18:32", "remaining_time": "1 day, 4:36:33"} +{"current_steps": 11740, "total_steps": 28254, "loss": 0.7018, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.156574504109203e-05, "epoch": 0.83, "percentage": 41.55, "elapsed_time": "20:19:34", "remaining_time": "1 day, 4:35:30"} +{"current_steps": 11750, "total_steps": 28254, "loss": 0.7293, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1538919013128295e-05, "epoch": 0.83, "percentage": 41.59, "elapsed_time": "20:20:37", "remaining_time": "1 day, 4:34:29"} +{"current_steps": 11760, "total_steps": 28254, "loss": 0.7382, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.151208490080209e-05, "epoch": 0.83, "percentage": 41.62, "elapsed_time": "20:21:41", "remaining_time": "1 day, 4:33:29"} +{"current_steps": 11770, "total_steps": 28254, "loss": 0.7483, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.148524273728964e-05, "epoch": 0.83, "percentage": 41.66, "elapsed_time": "20:22:46", "remaining_time": "1 day, 4:32:30"} +{"current_steps": 11780, "total_steps": 28254, "loss": 0.7483, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.145839255577714e-05, "epoch": 0.83, "percentage": 41.69, "elapsed_time": "20:23:48", "remaining_time": "1 day, 4:31:27"} +{"current_steps": 11790, "total_steps": 28254, "loss": 0.7278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1431534389460665e-05, "epoch": 0.83, "percentage": 41.73, "elapsed_time": "20:24:50", "remaining_time": "1 day, 4:30:24"} +{"current_steps": 11800, "total_steps": 28254, "loss": 0.7551, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.140466827154622e-05, "epoch": 0.84, "percentage": 41.76, "elapsed_time": "20:25:53", "remaining_time": "1 day, 4:29:23"} +{"current_steps": 11810, "total_steps": 28254, "loss": 0.7652, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.137779423524958e-05, "epoch": 0.84, "percentage": 41.8, "elapsed_time": "20:26:56", "remaining_time": "1 day, 4:28:21"} +{"current_steps": 11820, "total_steps": 28254, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1350912313796336e-05, "epoch": 0.84, "percentage": 41.83, "elapsed_time": "20:27:58", "remaining_time": "1 day, 4:27:19"} +{"current_steps": 11830, "total_steps": 28254, "loss": 0.722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.132402254042185e-05, "epoch": 0.84, "percentage": 41.87, "elapsed_time": "20:29:00", "remaining_time": "1 day, 4:26:15"} +{"current_steps": 11840, "total_steps": 28254, "loss": 0.6992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.129712494837115e-05, "epoch": 0.84, "percentage": 41.91, "elapsed_time": "20:30:01", "remaining_time": "1 day, 4:25:11"} +{"current_steps": 11850, "total_steps": 28254, "loss": 0.7204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.127021957089896e-05, "epoch": 0.84, "percentage": 41.94, "elapsed_time": "20:31:03", "remaining_time": "1 day, 4:24:09"} +{"current_steps": 11860, "total_steps": 28254, "loss": 0.7393, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.124330644126962e-05, "epoch": 0.84, "percentage": 41.98, "elapsed_time": "20:32:07", "remaining_time": "1 day, 4:23:08"} +{"current_steps": 11870, "total_steps": 28254, "loss": 0.7287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1216385592757045e-05, "epoch": 0.84, "percentage": 42.01, "elapsed_time": "20:33:08", "remaining_time": "1 day, 4:22:05"} +{"current_steps": 11880, "total_steps": 28254, "loss": 0.7548, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.118945705864471e-05, "epoch": 0.84, "percentage": 42.05, "elapsed_time": "20:34:11", "remaining_time": "1 day, 4:21:04"} +{"current_steps": 11890, "total_steps": 28254, "loss": 0.7513, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1162520872225584e-05, "epoch": 0.84, "percentage": 42.08, "elapsed_time": "20:35:13", "remaining_time": "1 day, 4:20:00"} +{"current_steps": 11900, "total_steps": 28254, "loss": 0.724, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.11355770668021e-05, "epoch": 0.84, "percentage": 42.12, "elapsed_time": "20:36:14", "remaining_time": "1 day, 4:18:57"} +{"current_steps": 11910, "total_steps": 28254, "loss": 0.7224, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.11086256756861e-05, "epoch": 0.84, "percentage": 42.15, "elapsed_time": "20:37:17", "remaining_time": "1 day, 4:17:55"} +{"current_steps": 11920, "total_steps": 28254, "loss": 0.7403, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1081666732198805e-05, "epoch": 0.84, "percentage": 42.19, "elapsed_time": "20:38:22", "remaining_time": "1 day, 4:16:56"} +{"current_steps": 11930, "total_steps": 28254, "loss": 0.7338, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1054700269670814e-05, "epoch": 0.84, "percentage": 42.22, "elapsed_time": "20:39:24", "remaining_time": "1 day, 4:15:53"} +{"current_steps": 11940, "total_steps": 28254, "loss": 0.69, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.102772632144195e-05, "epoch": 0.85, "percentage": 42.26, "elapsed_time": "20:40:27", "remaining_time": "1 day, 4:14:52"} +{"current_steps": 11950, "total_steps": 28254, "loss": 0.725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.100074492086136e-05, "epoch": 0.85, "percentage": 42.29, "elapsed_time": "20:41:30", "remaining_time": "1 day, 4:13:50"} +{"current_steps": 11960, "total_steps": 28254, "loss": 0.7465, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0973756101287344e-05, "epoch": 0.85, "percentage": 42.33, "elapsed_time": "20:42:32", "remaining_time": "1 day, 4:12:49"} +{"current_steps": 11970, "total_steps": 28254, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.094675989608744e-05, "epoch": 0.85, "percentage": 42.37, "elapsed_time": "20:43:38", "remaining_time": "1 day, 4:11:50"} +{"current_steps": 11980, "total_steps": 28254, "loss": 0.7192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.091975633863826e-05, "epoch": 0.85, "percentage": 42.4, "elapsed_time": "20:44:39", "remaining_time": "1 day, 4:10:47"} +{"current_steps": 11990, "total_steps": 28254, "loss": 0.7273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.089274546232554e-05, "epoch": 0.85, "percentage": 42.44, "elapsed_time": "20:45:44", "remaining_time": "1 day, 4:09:48"} +{"current_steps": 12000, "total_steps": 28254, "loss": 0.7629, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0865727300544026e-05, "epoch": 0.85, "percentage": 42.47, "elapsed_time": "20:46:46", "remaining_time": "1 day, 4:08:45"} +{"current_steps": 12010, "total_steps": 28254, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.083870188669754e-05, "epoch": 0.85, "percentage": 42.51, "elapsed_time": "20:47:49", "remaining_time": "1 day, 4:07:44"} +{"current_steps": 12020, "total_steps": 28254, "loss": 0.7557, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.081166925419879e-05, "epoch": 0.85, "percentage": 42.54, "elapsed_time": "20:48:51", "remaining_time": "1 day, 4:06:41"} +{"current_steps": 12030, "total_steps": 28254, "loss": 0.7376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.078462943646949e-05, "epoch": 0.85, "percentage": 42.58, "elapsed_time": "20:49:55", "remaining_time": "1 day, 4:05:41"} +{"current_steps": 12040, "total_steps": 28254, "loss": 0.74, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0757582466940135e-05, "epoch": 0.85, "percentage": 42.61, "elapsed_time": "20:50:56", "remaining_time": "1 day, 4:04:36"} +{"current_steps": 12050, "total_steps": 28254, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.073052837905018e-05, "epoch": 0.85, "percentage": 42.65, "elapsed_time": "20:51:59", "remaining_time": "1 day, 4:03:35"} +{"current_steps": 12060, "total_steps": 28254, "loss": 0.7117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0703467206247784e-05, "epoch": 0.85, "percentage": 42.68, "elapsed_time": "20:53:01", "remaining_time": "1 day, 4:02:32"} +{"current_steps": 12070, "total_steps": 28254, "loss": 0.7598, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.067639898198992e-05, "epoch": 0.85, "percentage": 42.72, "elapsed_time": "20:54:03", "remaining_time": "1 day, 4:01:30"} +{"current_steps": 12080, "total_steps": 28254, "loss": 0.7447, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.064932373974225e-05, "epoch": 0.86, "percentage": 42.76, "elapsed_time": "20:55:07", "remaining_time": "1 day, 4:00:29"} +{"current_steps": 12090, "total_steps": 28254, "loss": 0.7414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.062224151297915e-05, "epoch": 0.86, "percentage": 42.79, "elapsed_time": "20:56:10", "remaining_time": "1 day, 3:59:28"} +{"current_steps": 12100, "total_steps": 28254, "loss": 0.7199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.059515233518358e-05, "epoch": 0.86, "percentage": 42.83, "elapsed_time": "20:57:13", "remaining_time": "1 day, 3:58:26"} +{"current_steps": 12110, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.056805623984714e-05, "epoch": 0.86, "percentage": 42.86, "elapsed_time": "20:58:16", "remaining_time": "1 day, 3:57:25"} +{"current_steps": 12120, "total_steps": 28254, "loss": 0.7223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0540953260469945e-05, "epoch": 0.86, "percentage": 42.9, "elapsed_time": "20:59:18", "remaining_time": "1 day, 3:56:22"} +{"current_steps": 12130, "total_steps": 28254, "loss": 0.7383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0513843430560657e-05, "epoch": 0.86, "percentage": 42.93, "elapsed_time": "21:00:21", "remaining_time": "1 day, 3:55:21"} +{"current_steps": 12140, "total_steps": 28254, "loss": 0.741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0486726783636375e-05, "epoch": 0.86, "percentage": 42.97, "elapsed_time": "21:01:23", "remaining_time": "1 day, 3:54:18"} +{"current_steps": 12150, "total_steps": 28254, "loss": 0.7246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0459603353222643e-05, "epoch": 0.86, "percentage": 43.0, "elapsed_time": "21:02:26", "remaining_time": "1 day, 3:53:16"} +{"current_steps": 12160, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0432473172853404e-05, "epoch": 0.86, "percentage": 43.04, "elapsed_time": "21:03:29", "remaining_time": "1 day, 3:52:14"} +{"current_steps": 12170, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0405336276070918e-05, "epoch": 0.86, "percentage": 43.07, "elapsed_time": "21:04:31", "remaining_time": "1 day, 3:51:12"} +{"current_steps": 12180, "total_steps": 28254, "loss": 0.7204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0378192696425768e-05, "epoch": 0.86, "percentage": 43.11, "elapsed_time": "21:05:31", "remaining_time": "1 day, 3:50:07"} +{"current_steps": 12190, "total_steps": 28254, "loss": 0.7198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0351042467476782e-05, "epoch": 0.86, "percentage": 43.14, "elapsed_time": "21:06:34", "remaining_time": "1 day, 3:49:05"} +{"current_steps": 12200, "total_steps": 28254, "loss": 0.7504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0323885622791042e-05, "epoch": 0.86, "percentage": 43.18, "elapsed_time": "21:07:38", "remaining_time": "1 day, 3:48:05"} +{"current_steps": 12210, "total_steps": 28254, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0296722195943767e-05, "epoch": 0.86, "percentage": 43.22, "elapsed_time": "21:08:39", "remaining_time": "1 day, 3:47:01"} +{"current_steps": 12220, "total_steps": 28254, "loss": 0.7328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.026955222051836e-05, "epoch": 0.86, "percentage": 43.25, "elapsed_time": "21:09:41", "remaining_time": "1 day, 3:45:59"} +{"current_steps": 12230, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0242375730106265e-05, "epoch": 0.87, "percentage": 43.29, "elapsed_time": "21:10:44", "remaining_time": "1 day, 3:44:57"} +{"current_steps": 12240, "total_steps": 28254, "loss": 0.7309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0215192758307032e-05, "epoch": 0.87, "percentage": 43.32, "elapsed_time": "21:11:46", "remaining_time": "1 day, 3:43:53"} +{"current_steps": 12250, "total_steps": 28254, "loss": 0.7368, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0188003338728192e-05, "epoch": 0.87, "percentage": 43.36, "elapsed_time": "21:12:48", "remaining_time": "1 day, 3:42:51"} +{"current_steps": 12260, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0160807504985278e-05, "epoch": 0.87, "percentage": 43.39, "elapsed_time": "21:13:50", "remaining_time": "1 day, 3:41:48"} +{"current_steps": 12270, "total_steps": 28254, "loss": 0.7489, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0133605290701707e-05, "epoch": 0.87, "percentage": 43.43, "elapsed_time": "21:14:53", "remaining_time": "1 day, 3:40:47"} +{"current_steps": 12280, "total_steps": 28254, "loss": 0.7134, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0106396729508836e-05, "epoch": 0.87, "percentage": 43.46, "elapsed_time": "21:15:56", "remaining_time": "1 day, 3:39:46"} +{"current_steps": 12290, "total_steps": 28254, "loss": 0.7012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0079181855045818e-05, "epoch": 0.87, "percentage": 43.5, "elapsed_time": "21:17:02", "remaining_time": "1 day, 3:38:47"} +{"current_steps": 12300, "total_steps": 28254, "loss": 0.7242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0051960700959663e-05, "epoch": 0.87, "percentage": 43.53, "elapsed_time": "21:18:04", "remaining_time": "1 day, 3:37:45"} +{"current_steps": 12310, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.002473330090511e-05, "epoch": 0.87, "percentage": 43.57, "elapsed_time": "21:19:06", "remaining_time": "1 day, 3:36:42"} +{"current_steps": 12320, "total_steps": 28254, "loss": 0.7444, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.999749968854463e-05, "epoch": 0.87, "percentage": 43.6, "elapsed_time": "21:20:10", "remaining_time": "1 day, 3:35:41"} +{"current_steps": 12330, "total_steps": 28254, "loss": 0.7397, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9970259897548374e-05, "epoch": 0.87, "percentage": 43.64, "elapsed_time": "21:21:13", "remaining_time": "1 day, 3:34:41"} +{"current_steps": 12340, "total_steps": 28254, "loss": 0.7344, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9943013961594136e-05, "epoch": 0.87, "percentage": 43.68, "elapsed_time": "21:22:16", "remaining_time": "1 day, 3:33:39"} +{"current_steps": 12350, "total_steps": 28254, "loss": 0.7216, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9915761914367302e-05, "epoch": 0.87, "percentage": 43.71, "elapsed_time": "21:23:18", "remaining_time": "1 day, 3:32:36"} +{"current_steps": 12360, "total_steps": 28254, "loss": 0.7298, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9888503789560808e-05, "epoch": 0.87, "percentage": 43.75, "elapsed_time": "21:24:21", "remaining_time": "1 day, 3:31:35"} +{"current_steps": 12370, "total_steps": 28254, "loss": 0.7572, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.986123962087512e-05, "epoch": 0.88, "percentage": 43.78, "elapsed_time": "21:25:23", "remaining_time": "1 day, 3:30:32"} +{"current_steps": 12380, "total_steps": 28254, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9833969442018168e-05, "epoch": 0.88, "percentage": 43.82, "elapsed_time": "21:26:26", "remaining_time": "1 day, 3:29:31"} +{"current_steps": 12390, "total_steps": 28254, "loss": 0.7127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9806693286705312e-05, "epoch": 0.88, "percentage": 43.85, "elapsed_time": "21:27:28", "remaining_time": "1 day, 3:28:28"} +{"current_steps": 12400, "total_steps": 28254, "loss": 0.7188, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.977941118865929e-05, "epoch": 0.88, "percentage": 43.89, "elapsed_time": "21:28:31", "remaining_time": "1 day, 3:27:26"} +{"current_steps": 12410, "total_steps": 28254, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9752123181610216e-05, "epoch": 0.88, "percentage": 43.92, "elapsed_time": "21:29:33", "remaining_time": "1 day, 3:26:23"} +{"current_steps": 12420, "total_steps": 28254, "loss": 0.722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9724829299295477e-05, "epoch": 0.88, "percentage": 43.96, "elapsed_time": "21:30:37", "remaining_time": "1 day, 3:25:23"} +{"current_steps": 12430, "total_steps": 28254, "loss": 0.7404, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9697529575459755e-05, "epoch": 0.88, "percentage": 43.99, "elapsed_time": "21:31:40", "remaining_time": "1 day, 3:24:22"} +{"current_steps": 12440, "total_steps": 28254, "loss": 0.719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9670224043854916e-05, "epoch": 0.88, "percentage": 44.03, "elapsed_time": "21:32:42", "remaining_time": "1 day, 3:23:19"} +{"current_steps": 12450, "total_steps": 28254, "loss": 0.7442, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9642912738240052e-05, "epoch": 0.88, "percentage": 44.06, "elapsed_time": "21:33:43", "remaining_time": "1 day, 3:22:14"} +{"current_steps": 12460, "total_steps": 28254, "loss": 0.7398, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9615595692381348e-05, "epoch": 0.88, "percentage": 44.1, "elapsed_time": "21:34:42", "remaining_time": "1 day, 3:21:08"} +{"current_steps": 12470, "total_steps": 28254, "loss": 0.7281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.958827294005213e-05, "epoch": 0.88, "percentage": 44.14, "elapsed_time": "21:35:45", "remaining_time": "1 day, 3:20:06"} +{"current_steps": 12480, "total_steps": 28254, "loss": 0.721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.956094451503274e-05, "epoch": 0.88, "percentage": 44.17, "elapsed_time": "21:36:49", "remaining_time": "1 day, 3:19:06"} +{"current_steps": 12490, "total_steps": 28254, "loss": 0.7184, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9533610451110566e-05, "epoch": 0.88, "percentage": 44.21, "elapsed_time": "21:37:51", "remaining_time": "1 day, 3:18:04"} +{"current_steps": 12500, "total_steps": 28254, "loss": 0.719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9509005000249595e-05, "epoch": 0.88, "percentage": 44.24, "elapsed_time": "21:38:54", "remaining_time": "1 day, 3:17:02"} +{"current_steps": 12510, "total_steps": 28254, "loss": 0.7482, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.948166031552126e-05, "epoch": 0.89, "percentage": 44.28, "elapsed_time": "21:39:56", "remaining_time": "1 day, 3:15:59"} +{"current_steps": 12520, "total_steps": 28254, "loss": 0.7418, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9454310089912785e-05, "epoch": 0.89, "percentage": 44.31, "elapsed_time": "21:41:00", "remaining_time": "1 day, 3:14:59"} +{"current_steps": 12530, "total_steps": 28254, "loss": 0.7526, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9426954357238502e-05, "epoch": 0.89, "percentage": 44.35, "elapsed_time": "21:42:02", "remaining_time": "1 day, 3:13:56"} +{"current_steps": 12540, "total_steps": 28254, "loss": 0.725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.939959315131954e-05, "epoch": 0.89, "percentage": 44.38, "elapsed_time": "21:43:04", "remaining_time": "1 day, 3:12:53"} +{"current_steps": 12550, "total_steps": 28254, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9372226505983802e-05, "epoch": 0.89, "percentage": 44.42, "elapsed_time": "21:44:07", "remaining_time": "1 day, 3:11:52"} +{"current_steps": 12560, "total_steps": 28254, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.934485445506591e-05, "epoch": 0.89, "percentage": 44.45, "elapsed_time": "21:45:09", "remaining_time": "1 day, 3:10:49"} +{"current_steps": 12570, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9317477032407188e-05, "epoch": 0.89, "percentage": 44.49, "elapsed_time": "21:46:12", "remaining_time": "1 day, 3:09:47"} +{"current_steps": 12580, "total_steps": 28254, "loss": 0.7015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9290094271855573e-05, "epoch": 0.89, "percentage": 44.52, "elapsed_time": "21:47:16", "remaining_time": "1 day, 3:08:47"} +{"current_steps": 12590, "total_steps": 28254, "loss": 0.6919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9262706207265618e-05, "epoch": 0.89, "percentage": 44.56, "elapsed_time": "21:48:18", "remaining_time": "1 day, 3:07:45"} +{"current_steps": 12600, "total_steps": 28254, "loss": 0.7245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.923531287249843e-05, "epoch": 0.89, "percentage": 44.6, "elapsed_time": "21:49:22", "remaining_time": "1 day, 3:06:44"} +{"current_steps": 12610, "total_steps": 28254, "loss": 0.7212, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9207914301421635e-05, "epoch": 0.89, "percentage": 44.63, "elapsed_time": "21:50:25", "remaining_time": "1 day, 3:05:43"} +{"current_steps": 12620, "total_steps": 28254, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9180510527909334e-05, "epoch": 0.89, "percentage": 44.67, "elapsed_time": "21:51:28", "remaining_time": "1 day, 3:04:41"} +{"current_steps": 12630, "total_steps": 28254, "loss": 0.7417, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.915310158584205e-05, "epoch": 0.89, "percentage": 44.7, "elapsed_time": "21:52:30", "remaining_time": "1 day, 3:03:39"} +{"current_steps": 12640, "total_steps": 28254, "loss": 0.7139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9125687509106702e-05, "epoch": 0.89, "percentage": 44.74, "elapsed_time": "21:53:32", "remaining_time": "1 day, 3:02:36"} +{"current_steps": 12650, "total_steps": 28254, "loss": 0.7098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9098268331596568e-05, "epoch": 0.9, "percentage": 44.77, "elapsed_time": "21:54:36", "remaining_time": "1 day, 3:01:35"} +{"current_steps": 12660, "total_steps": 28254, "loss": 0.7271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9070844087211207e-05, "epoch": 0.9, "percentage": 44.81, "elapsed_time": "21:55:39", "remaining_time": "1 day, 3:00:33"} +{"current_steps": 12670, "total_steps": 28254, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9043414809856463e-05, "epoch": 0.9, "percentage": 44.84, "elapsed_time": "21:56:41", "remaining_time": "1 day, 2:59:31"} +{"current_steps": 12680, "total_steps": 28254, "loss": 0.7483, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.901598053344441e-05, "epoch": 0.9, "percentage": 44.88, "elapsed_time": "21:57:44", "remaining_time": "1 day, 2:58:29"} +{"current_steps": 12690, "total_steps": 28254, "loss": 0.7425, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8988541291893267e-05, "epoch": 0.9, "percentage": 44.91, "elapsed_time": "21:58:49", "remaining_time": "1 day, 2:57:31"} +{"current_steps": 12700, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.896109711912744e-05, "epoch": 0.9, "percentage": 44.95, "elapsed_time": "21:59:55", "remaining_time": "1 day, 2:56:32"} +{"current_steps": 12710, "total_steps": 28254, "loss": 0.7443, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.893364804907738e-05, "epoch": 0.9, "percentage": 44.98, "elapsed_time": "22:00:59", "remaining_time": "1 day, 2:55:31"} +{"current_steps": 12720, "total_steps": 28254, "loss": 0.7383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.890619411567964e-05, "epoch": 0.9, "percentage": 45.02, "elapsed_time": "22:02:02", "remaining_time": "1 day, 2:54:30"} +{"current_steps": 12730, "total_steps": 28254, "loss": 0.7197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8878735352876746e-05, "epoch": 0.9, "percentage": 45.06, "elapsed_time": "22:03:04", "remaining_time": "1 day, 2:53:27"} +{"current_steps": 12740, "total_steps": 28254, "loss": 0.7102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.885127179461723e-05, "epoch": 0.9, "percentage": 45.09, "elapsed_time": "22:04:07", "remaining_time": "1 day, 2:52:26"} +{"current_steps": 12750, "total_steps": 28254, "loss": 0.7379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.882380347485552e-05, "epoch": 0.9, "percentage": 45.13, "elapsed_time": "22:05:10", "remaining_time": "1 day, 2:51:24"} +{"current_steps": 12760, "total_steps": 28254, "loss": 0.736, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8796330427551958e-05, "epoch": 0.9, "percentage": 45.16, "elapsed_time": "22:06:12", "remaining_time": "1 day, 2:50:22"} +{"current_steps": 12770, "total_steps": 28254, "loss": 0.7209, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.876885268667272e-05, "epoch": 0.9, "percentage": 45.2, "elapsed_time": "22:07:18", "remaining_time": "1 day, 2:49:24"} +{"current_steps": 12780, "total_steps": 28254, "loss": 0.7219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8741370286189783e-05, "epoch": 0.9, "percentage": 45.23, "elapsed_time": "22:08:21", "remaining_time": "1 day, 2:48:22"} +{"current_steps": 12790, "total_steps": 28254, "loss": 0.7205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.871388326008088e-05, "epoch": 0.91, "percentage": 45.27, "elapsed_time": "22:09:26", "remaining_time": "1 day, 2:47:23"} +{"current_steps": 12800, "total_steps": 28254, "loss": 0.7213, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.868639164232948e-05, "epoch": 0.91, "percentage": 45.3, "elapsed_time": "22:10:31", "remaining_time": "1 day, 2:46:23"} +{"current_steps": 12810, "total_steps": 28254, "loss": 0.7205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8658895466924707e-05, "epoch": 0.91, "percentage": 45.34, "elapsed_time": "22:11:33", "remaining_time": "1 day, 2:45:21"} +{"current_steps": 12820, "total_steps": 28254, "loss": 0.7313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8631394767861342e-05, "epoch": 0.91, "percentage": 45.37, "elapsed_time": "22:12:39", "remaining_time": "1 day, 2:44:23"} +{"current_steps": 12830, "total_steps": 28254, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8603889579139742e-05, "epoch": 0.91, "percentage": 45.41, "elapsed_time": "22:13:42", "remaining_time": "1 day, 2:43:21"} +{"current_steps": 12840, "total_steps": 28254, "loss": 0.7366, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8576379934765824e-05, "epoch": 0.91, "percentage": 45.44, "elapsed_time": "22:14:46", "remaining_time": "1 day, 2:42:21"} +{"current_steps": 12850, "total_steps": 28254, "loss": 0.7453, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8548865868751002e-05, "epoch": 0.91, "percentage": 45.48, "elapsed_time": "22:15:50", "remaining_time": "1 day, 2:41:20"} +{"current_steps": 12860, "total_steps": 28254, "loss": 0.7412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8521347415112175e-05, "epoch": 0.91, "percentage": 45.52, "elapsed_time": "22:16:54", "remaining_time": "1 day, 2:40:20"} +{"current_steps": 12870, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.849382460787165e-05, "epoch": 0.91, "percentage": 45.55, "elapsed_time": "22:17:56", "remaining_time": "1 day, 2:39:17"} +{"current_steps": 12880, "total_steps": 28254, "loss": 0.7102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.846629748105713e-05, "epoch": 0.91, "percentage": 45.59, "elapsed_time": "22:19:00", "remaining_time": "1 day, 2:38:16"} +{"current_steps": 12890, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8438766068701643e-05, "epoch": 0.91, "percentage": 45.62, "elapsed_time": "22:20:01", "remaining_time": "1 day, 2:37:13"} +{"current_steps": 12900, "total_steps": 28254, "loss": 0.7229, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.841123040484353e-05, "epoch": 0.91, "percentage": 45.66, "elapsed_time": "22:21:03", "remaining_time": "1 day, 2:36:10"} +{"current_steps": 12910, "total_steps": 28254, "loss": 0.7041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8383690523526386e-05, "epoch": 0.91, "percentage": 45.69, "elapsed_time": "22:22:07", "remaining_time": "1 day, 2:35:09"} +{"current_steps": 12920, "total_steps": 28254, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.835614645879901e-05, "epoch": 0.91, "percentage": 45.73, "elapsed_time": "22:23:09", "remaining_time": "1 day, 2:34:07"} +{"current_steps": 12930, "total_steps": 28254, "loss": 0.7469, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8328598244715377e-05, "epoch": 0.92, "percentage": 45.76, "elapsed_time": "22:24:14", "remaining_time": "1 day, 2:33:08"} +{"current_steps": 12940, "total_steps": 28254, "loss": 0.7331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8301045915334606e-05, "epoch": 0.92, "percentage": 45.8, "elapsed_time": "22:25:16", "remaining_time": "1 day, 2:32:05"} +{"current_steps": 12950, "total_steps": 28254, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8273489504720885e-05, "epoch": 0.92, "percentage": 45.83, "elapsed_time": "22:26:19", "remaining_time": "1 day, 2:31:02"} +{"current_steps": 12960, "total_steps": 28254, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8245929046943453e-05, "epoch": 0.92, "percentage": 45.87, "elapsed_time": "22:27:23", "remaining_time": "1 day, 2:30:02"} +{"current_steps": 12970, "total_steps": 28254, "loss": 0.7246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8218364576076566e-05, "epoch": 0.92, "percentage": 45.91, "elapsed_time": "22:28:26", "remaining_time": "1 day, 2:29:00"} +{"current_steps": 12980, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8190796126199415e-05, "epoch": 0.92, "percentage": 45.94, "elapsed_time": "22:29:27", "remaining_time": "1 day, 2:27:57"} +{"current_steps": 12990, "total_steps": 28254, "loss": 0.719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8163223731396143e-05, "epoch": 0.92, "percentage": 45.98, "elapsed_time": "22:30:31", "remaining_time": "1 day, 2:26:56"} +{"current_steps": 13000, "total_steps": 28254, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.813564742575575e-05, "epoch": 0.92, "percentage": 46.01, "elapsed_time": "22:31:34", "remaining_time": "1 day, 2:25:54"} +{"current_steps": 13010, "total_steps": 28254, "loss": 0.7325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8108067243372067e-05, "epoch": 0.92, "percentage": 46.05, "elapsed_time": "22:32:36", "remaining_time": "1 day, 2:24:52"} +{"current_steps": 13020, "total_steps": 28254, "loss": 0.7346, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.808048321834373e-05, "epoch": 0.92, "percentage": 46.08, "elapsed_time": "22:33:37", "remaining_time": "1 day, 2:23:48"} +{"current_steps": 13030, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8052895384774125e-05, "epoch": 0.92, "percentage": 46.12, "elapsed_time": "22:34:39", "remaining_time": "1 day, 2:22:45"} +{"current_steps": 13040, "total_steps": 28254, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8025303776771333e-05, "epoch": 0.92, "percentage": 46.15, "elapsed_time": "22:35:42", "remaining_time": "1 day, 2:21:43"} +{"current_steps": 13050, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7997708428448126e-05, "epoch": 0.92, "percentage": 46.19, "elapsed_time": "22:36:44", "remaining_time": "1 day, 2:20:40"} +{"current_steps": 13060, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7970109373921878e-05, "epoch": 0.92, "percentage": 46.22, "elapsed_time": "22:37:48", "remaining_time": "1 day, 2:19:39"} +{"current_steps": 13070, "total_steps": 28254, "loss": 0.7488, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7942506647314547e-05, "epoch": 0.93, "percentage": 46.26, "elapsed_time": "22:38:49", "remaining_time": "1 day, 2:18:36"} +{"current_steps": 13080, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7914900282752648e-05, "epoch": 0.93, "percentage": 46.29, "elapsed_time": "22:39:53", "remaining_time": "1 day, 2:17:35"} +{"current_steps": 13090, "total_steps": 28254, "loss": 0.7391, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.788729031436718e-05, "epoch": 0.93, "percentage": 46.33, "elapsed_time": "22:40:55", "remaining_time": "1 day, 2:16:32"} +{"current_steps": 13100, "total_steps": 28254, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.78596767762936e-05, "epoch": 0.93, "percentage": 46.37, "elapsed_time": "22:41:56", "remaining_time": "1 day, 2:15:29"} +{"current_steps": 13110, "total_steps": 28254, "loss": 0.7312, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7832059702671776e-05, "epoch": 0.93, "percentage": 46.4, "elapsed_time": "22:42:58", "remaining_time": "1 day, 2:14:26"} +{"current_steps": 13120, "total_steps": 28254, "loss": 0.7198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7804439127645955e-05, "epoch": 0.93, "percentage": 46.44, "elapsed_time": "22:44:02", "remaining_time": "1 day, 2:13:25"} +{"current_steps": 13130, "total_steps": 28254, "loss": 0.7061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7776815085364705e-05, "epoch": 0.93, "percentage": 46.47, "elapsed_time": "22:45:05", "remaining_time": "1 day, 2:12:24"} +{"current_steps": 13140, "total_steps": 28254, "loss": 0.7045, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7749187609980887e-05, "epoch": 0.93, "percentage": 46.51, "elapsed_time": "22:46:09", "remaining_time": "1 day, 2:11:23"} +{"current_steps": 13150, "total_steps": 28254, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.77215567356516e-05, "epoch": 0.93, "percentage": 46.54, "elapsed_time": "22:47:12", "remaining_time": "1 day, 2:10:22"} +{"current_steps": 13160, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7693922496538143e-05, "epoch": 0.93, "percentage": 46.58, "elapsed_time": "22:48:16", "remaining_time": "1 day, 2:09:21"} +{"current_steps": 13170, "total_steps": 28254, "loss": 0.7349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.766628492680599e-05, "epoch": 0.93, "percentage": 46.61, "elapsed_time": "22:49:19", "remaining_time": "1 day, 2:08:19"} +{"current_steps": 13180, "total_steps": 28254, "loss": 0.7177, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7638644060624723e-05, "epoch": 0.93, "percentage": 46.65, "elapsed_time": "22:50:17", "remaining_time": "1 day, 2:07:12"} +{"current_steps": 13190, "total_steps": 28254, "loss": 0.722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7610999932167993e-05, "epoch": 0.93, "percentage": 46.68, "elapsed_time": "22:51:19", "remaining_time": "1 day, 2:06:09"} +{"current_steps": 13200, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7583352575613497e-05, "epoch": 0.93, "percentage": 46.72, "elapsed_time": "22:52:22", "remaining_time": "1 day, 2:05:07"} +{"current_steps": 13210, "total_steps": 28254, "loss": 0.7362, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7555702025142916e-05, "epoch": 0.94, "percentage": 46.75, "elapsed_time": "22:53:25", "remaining_time": "1 day, 2:04:06"} +{"current_steps": 13220, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7528048314941872e-05, "epoch": 0.94, "percentage": 46.79, "elapsed_time": "22:54:27", "remaining_time": "1 day, 2:03:03"} +{"current_steps": 13230, "total_steps": 28254, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.750039147919993e-05, "epoch": 0.94, "percentage": 46.83, "elapsed_time": "22:55:29", "remaining_time": "1 day, 2:02:00"} +{"current_steps": 13240, "total_steps": 28254, "loss": 0.7194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7472731552110448e-05, "epoch": 0.94, "percentage": 46.86, "elapsed_time": "22:56:31", "remaining_time": "1 day, 2:00:57"} +{"current_steps": 13250, "total_steps": 28254, "loss": 0.7414, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.744506856787069e-05, "epoch": 0.94, "percentage": 46.9, "elapsed_time": "22:57:34", "remaining_time": "1 day, 1:59:56"} +{"current_steps": 13260, "total_steps": 28254, "loss": 0.7284, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7417402560681636e-05, "epoch": 0.94, "percentage": 46.93, "elapsed_time": "22:58:35", "remaining_time": "1 day, 1:58:51"} +{"current_steps": 13270, "total_steps": 28254, "loss": 0.7415, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7389733564748043e-05, "epoch": 0.94, "percentage": 46.97, "elapsed_time": "22:59:36", "remaining_time": "1 day, 1:57:47"} +{"current_steps": 13280, "total_steps": 28254, "loss": 0.7371, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7362061614278333e-05, "epoch": 0.94, "percentage": 47.0, "elapsed_time": "23:00:38", "remaining_time": "1 day, 1:56:44"} +{"current_steps": 13290, "total_steps": 28254, "loss": 0.7564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7334386743484608e-05, "epoch": 0.94, "percentage": 47.04, "elapsed_time": "23:01:40", "remaining_time": "1 day, 1:55:42"} +{"current_steps": 13300, "total_steps": 28254, "loss": 0.7017, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7306708986582553e-05, "epoch": 0.94, "percentage": 47.07, "elapsed_time": "23:02:42", "remaining_time": "1 day, 1:54:39"} +{"current_steps": 13310, "total_steps": 28254, "loss": 0.7452, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7279028377791444e-05, "epoch": 0.94, "percentage": 47.11, "elapsed_time": "23:03:43", "remaining_time": "1 day, 1:53:36"} +{"current_steps": 13320, "total_steps": 28254, "loss": 0.74, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.725134495133407e-05, "epoch": 0.94, "percentage": 47.14, "elapsed_time": "23:04:46", "remaining_time": "1 day, 1:52:34"} +{"current_steps": 13330, "total_steps": 28254, "loss": 0.741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7223658741436714e-05, "epoch": 0.94, "percentage": 47.18, "elapsed_time": "23:05:47", "remaining_time": "1 day, 1:51:30"} +{"current_steps": 13340, "total_steps": 28254, "loss": 0.7338, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.719596978232909e-05, "epoch": 0.94, "percentage": 47.21, "elapsed_time": "23:06:49", "remaining_time": "1 day, 1:50:27"} +{"current_steps": 13350, "total_steps": 28254, "loss": 0.7036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7168278108244318e-05, "epoch": 0.94, "percentage": 47.25, "elapsed_time": "23:07:51", "remaining_time": "1 day, 1:49:25"} +{"current_steps": 13360, "total_steps": 28254, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.714058375341887e-05, "epoch": 0.95, "percentage": 47.29, "elapsed_time": "23:08:53", "remaining_time": "1 day, 1:48:21"} +{"current_steps": 13370, "total_steps": 28254, "loss": 0.7165, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7112886752092535e-05, "epoch": 0.95, "percentage": 47.32, "elapsed_time": "23:09:55", "remaining_time": "1 day, 1:47:19"} +{"current_steps": 13380, "total_steps": 28254, "loss": 0.6954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7085187138508373e-05, "epoch": 0.95, "percentage": 47.36, "elapsed_time": "23:10:56", "remaining_time": "1 day, 1:46:15"} +{"current_steps": 13390, "total_steps": 28254, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7057484946912676e-05, "epoch": 0.95, "percentage": 47.39, "elapsed_time": "23:11:57", "remaining_time": "1 day, 1:45:11"} +{"current_steps": 13400, "total_steps": 28254, "loss": 0.7261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7029780211554917e-05, "epoch": 0.95, "percentage": 47.43, "elapsed_time": "23:12:58", "remaining_time": "1 day, 1:44:07"} +{"current_steps": 13410, "total_steps": 28254, "loss": 0.7591, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.700207296668772e-05, "epoch": 0.95, "percentage": 47.46, "elapsed_time": "23:14:02", "remaining_time": "1 day, 1:43:06"} +{"current_steps": 13420, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6974363246566814e-05, "epoch": 0.95, "percentage": 47.5, "elapsed_time": "23:15:03", "remaining_time": "1 day, 1:42:03"} +{"current_steps": 13430, "total_steps": 28254, "loss": 0.7162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.694665108545098e-05, "epoch": 0.95, "percentage": 47.53, "elapsed_time": "23:16:05", "remaining_time": "1 day, 1:40:59"} +{"current_steps": 13440, "total_steps": 28254, "loss": 0.7088, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6918936517602023e-05, "epoch": 0.95, "percentage": 47.57, "elapsed_time": "23:17:05", "remaining_time": "1 day, 1:39:55"} +{"current_steps": 13450, "total_steps": 28254, "loss": 0.7684, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.689121957728471e-05, "epoch": 0.95, "percentage": 47.6, "elapsed_time": "23:18:10", "remaining_time": "1 day, 1:38:55"} +{"current_steps": 13460, "total_steps": 28254, "loss": 0.7023, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.686350029876678e-05, "epoch": 0.95, "percentage": 47.64, "elapsed_time": "23:19:11", "remaining_time": "1 day, 1:37:51"} +{"current_steps": 13470, "total_steps": 28254, "loss": 0.7079, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6835778716318804e-05, "epoch": 0.95, "percentage": 47.67, "elapsed_time": "23:20:14", "remaining_time": "1 day, 1:36:49"} +{"current_steps": 13480, "total_steps": 28254, "loss": 0.7105, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.680805486421426e-05, "epoch": 0.95, "percentage": 47.71, "elapsed_time": "23:21:16", "remaining_time": "1 day, 1:35:47"} +{"current_steps": 13490, "total_steps": 28254, "loss": 0.7583, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.678032877672938e-05, "epoch": 0.95, "percentage": 47.75, "elapsed_time": "23:22:20", "remaining_time": "1 day, 1:34:46"} +{"current_steps": 13500, "total_steps": 28254, "loss": 0.7468, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6752600488143216e-05, "epoch": 0.96, "percentage": 47.78, "elapsed_time": "23:23:24", "remaining_time": "1 day, 1:33:45"} +{"current_steps": 13510, "total_steps": 28254, "loss": 0.7491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6724870032737475e-05, "epoch": 0.96, "percentage": 47.82, "elapsed_time": "23:24:26", "remaining_time": "1 day, 1:32:43"} +{"current_steps": 13520, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6697137444796604e-05, "epoch": 0.96, "percentage": 47.85, "elapsed_time": "23:25:27", "remaining_time": "1 day, 1:31:39"} +{"current_steps": 13530, "total_steps": 28254, "loss": 0.7139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.666940275860765e-05, "epoch": 0.96, "percentage": 47.89, "elapsed_time": "23:26:29", "remaining_time": "1 day, 1:30:36"} +{"current_steps": 13540, "total_steps": 28254, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6641666008460263e-05, "epoch": 0.96, "percentage": 47.92, "elapsed_time": "23:27:33", "remaining_time": "1 day, 1:29:35"} +{"current_steps": 13550, "total_steps": 28254, "loss": 0.7396, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.661392722864665e-05, "epoch": 0.96, "percentage": 47.96, "elapsed_time": "23:28:35", "remaining_time": "1 day, 1:28:33"} +{"current_steps": 13560, "total_steps": 28254, "loss": 0.7135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6586186453461533e-05, "epoch": 0.96, "percentage": 47.99, "elapsed_time": "23:29:37", "remaining_time": "1 day, 1:27:30"} +{"current_steps": 13570, "total_steps": 28254, "loss": 0.7286, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6558443717202076e-05, "epoch": 0.96, "percentage": 48.03, "elapsed_time": "23:30:38", "remaining_time": "1 day, 1:26:27"} +{"current_steps": 13580, "total_steps": 28254, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6530699054167896e-05, "epoch": 0.96, "percentage": 48.06, "elapsed_time": "23:31:41", "remaining_time": "1 day, 1:25:24"} +{"current_steps": 13590, "total_steps": 28254, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.650295249866097e-05, "epoch": 0.96, "percentage": 48.1, "elapsed_time": "23:32:39", "remaining_time": "1 day, 1:24:18"} +{"current_steps": 13600, "total_steps": 28254, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.647520408498563e-05, "epoch": 0.96, "percentage": 48.13, "elapsed_time": "23:33:42", "remaining_time": "1 day, 1:23:16"} +{"current_steps": 13610, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.64474538474485e-05, "epoch": 0.96, "percentage": 48.17, "elapsed_time": "23:34:44", "remaining_time": "1 day, 1:22:13"} +{"current_steps": 13620, "total_steps": 28254, "loss": 0.7216, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6419701820358457e-05, "epoch": 0.96, "percentage": 48.21, "elapsed_time": "23:35:46", "remaining_time": "1 day, 1:21:10"} +{"current_steps": 13630, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6391948038026587e-05, "epoch": 0.96, "percentage": 48.24, "elapsed_time": "23:36:48", "remaining_time": "1 day, 1:20:08"} +{"current_steps": 13640, "total_steps": 28254, "loss": 0.7416, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6364192534766163e-05, "epoch": 0.97, "percentage": 48.28, "elapsed_time": "23:37:51", "remaining_time": "1 day, 1:19:05"} +{"current_steps": 13650, "total_steps": 28254, "loss": 0.7127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.633643534489256e-05, "epoch": 0.97, "percentage": 48.31, "elapsed_time": "23:38:54", "remaining_time": "1 day, 1:18:04"} +{"current_steps": 13660, "total_steps": 28254, "loss": 0.7175, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.630867650272327e-05, "epoch": 0.97, "percentage": 48.35, "elapsed_time": "23:39:57", "remaining_time": "1 day, 1:17:02"} +{"current_steps": 13670, "total_steps": 28254, "loss": 0.7149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.628091604257779e-05, "epoch": 0.97, "percentage": 48.38, "elapsed_time": "23:40:59", "remaining_time": "1 day, 1:16:00"} +{"current_steps": 13680, "total_steps": 28254, "loss": 0.7207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6253153998777646e-05, "epoch": 0.97, "percentage": 48.42, "elapsed_time": "23:42:02", "remaining_time": "1 day, 1:14:57"} +{"current_steps": 13690, "total_steps": 28254, "loss": 0.7319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.622539040564633e-05, "epoch": 0.97, "percentage": 48.45, "elapsed_time": "23:43:03", "remaining_time": "1 day, 1:13:54"} +{"current_steps": 13700, "total_steps": 28254, "loss": 0.7423, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.61976252975092e-05, "epoch": 0.97, "percentage": 48.49, "elapsed_time": "23:44:04", "remaining_time": "1 day, 1:12:51"} +{"current_steps": 13710, "total_steps": 28254, "loss": 0.7501, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6169858708693544e-05, "epoch": 0.97, "percentage": 48.52, "elapsed_time": "23:45:11", "remaining_time": "1 day, 1:11:52"} +{"current_steps": 13720, "total_steps": 28254, "loss": 0.7502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.614209067352844e-05, "epoch": 0.97, "percentage": 48.56, "elapsed_time": "23:46:10", "remaining_time": "1 day, 1:10:47"} +{"current_steps": 13730, "total_steps": 28254, "loss": 0.7136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6114321226344797e-05, "epoch": 0.97, "percentage": 48.59, "elapsed_time": "23:47:12", "remaining_time": "1 day, 1:09:44"} +{"current_steps": 13740, "total_steps": 28254, "loss": 0.7071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.608655040147521e-05, "epoch": 0.97, "percentage": 48.63, "elapsed_time": "23:48:15", "remaining_time": "1 day, 1:08:42"} +{"current_steps": 13750, "total_steps": 28254, "loss": 0.7285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6058778233254044e-05, "epoch": 0.97, "percentage": 48.67, "elapsed_time": "23:49:17", "remaining_time": "1 day, 1:07:40"} +{"current_steps": 13760, "total_steps": 28254, "loss": 0.7562, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6031004756017258e-05, "epoch": 0.97, "percentage": 48.7, "elapsed_time": "23:50:20", "remaining_time": "1 day, 1:06:38"} +{"current_steps": 13770, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.600323000410249e-05, "epoch": 0.97, "percentage": 48.74, "elapsed_time": "23:51:20", "remaining_time": "1 day, 1:05:33"} +{"current_steps": 13780, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.597545401184891e-05, "epoch": 0.98, "percentage": 48.77, "elapsed_time": "23:52:22", "remaining_time": "1 day, 1:04:31"} +{"current_steps": 13790, "total_steps": 28254, "loss": 0.7321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5947676813597253e-05, "epoch": 0.98, "percentage": 48.81, "elapsed_time": "23:53:24", "remaining_time": "1 day, 1:03:27"} +{"current_steps": 13800, "total_steps": 28254, "loss": 0.7412, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5919898443689712e-05, "epoch": 0.98, "percentage": 48.84, "elapsed_time": "23:54:28", "remaining_time": "1 day, 1:02:27"} +{"current_steps": 13810, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5892118936469965e-05, "epoch": 0.98, "percentage": 48.88, "elapsed_time": "23:55:31", "remaining_time": "1 day, 1:01:25"} +{"current_steps": 13820, "total_steps": 28254, "loss": 0.7262, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5864338326283068e-05, "epoch": 0.98, "percentage": 48.91, "elapsed_time": "23:56:35", "remaining_time": "1 day, 1:00:25"} +{"current_steps": 13830, "total_steps": 28254, "loss": 0.7041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5836556647475453e-05, "epoch": 0.98, "percentage": 48.95, "elapsed_time": "23:57:39", "remaining_time": "1 day, 0:59:23"} +{"current_steps": 13840, "total_steps": 28254, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.580877393439487e-05, "epoch": 0.98, "percentage": 48.98, "elapsed_time": "23:58:41", "remaining_time": "1 day, 0:58:21"} +{"current_steps": 13850, "total_steps": 28254, "loss": 0.7501, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5780990221390355e-05, "epoch": 0.98, "percentage": 49.02, "elapsed_time": "23:59:42", "remaining_time": "1 day, 0:57:18"} +{"current_steps": 13860, "total_steps": 28254, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5753205542812163e-05, "epoch": 0.98, "percentage": 49.06, "elapsed_time": "1 day, 0:00:47", "remaining_time": "1 day, 0:56:18"} +{"current_steps": 13870, "total_steps": 28254, "loss": 0.7348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5725419933011763e-05, "epoch": 0.98, "percentage": 49.09, "elapsed_time": "1 day, 0:01:48", "remaining_time": "1 day, 0:55:14"} +{"current_steps": 13880, "total_steps": 28254, "loss": 0.7136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5697633426341762e-05, "epoch": 0.98, "percentage": 49.13, "elapsed_time": "1 day, 0:02:50", "remaining_time": "1 day, 0:54:11"} +{"current_steps": 13890, "total_steps": 28254, "loss": 0.7142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5669846057155878e-05, "epoch": 0.98, "percentage": 49.16, "elapsed_time": "1 day, 0:03:52", "remaining_time": "1 day, 0:53:08"} +{"current_steps": 13900, "total_steps": 28254, "loss": 0.7427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.56420578598089e-05, "epoch": 0.98, "percentage": 49.2, "elapsed_time": "1 day, 0:04:55", "remaining_time": "1 day, 0:52:07"} +{"current_steps": 13910, "total_steps": 28254, "loss": 0.7268, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5614268868656633e-05, "epoch": 0.98, "percentage": 49.23, "elapsed_time": "1 day, 0:05:56", "remaining_time": "1 day, 0:51:03"} +{"current_steps": 13920, "total_steps": 28254, "loss": 0.7031, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5586479118055877e-05, "epoch": 0.99, "percentage": 49.27, "elapsed_time": "1 day, 0:06:59", "remaining_time": "1 day, 0:50:02"} +{"current_steps": 13930, "total_steps": 28254, "loss": 0.7564, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5558688642364353e-05, "epoch": 0.99, "percentage": 49.3, "elapsed_time": "1 day, 0:08:02", "remaining_time": "1 day, 0:48:59"} +{"current_steps": 13940, "total_steps": 28254, "loss": 0.7245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5530897475940706e-05, "epoch": 0.99, "percentage": 49.34, "elapsed_time": "1 day, 0:09:04", "remaining_time": "1 day, 0:47:56"} +{"current_steps": 13950, "total_steps": 28254, "loss": 0.7307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5503105653144392e-05, "epoch": 0.99, "percentage": 49.37, "elapsed_time": "1 day, 0:10:09", "remaining_time": "1 day, 0:46:57"} +{"current_steps": 13960, "total_steps": 28254, "loss": 0.7294, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5475313208335728e-05, "epoch": 0.99, "percentage": 49.41, "elapsed_time": "1 day, 0:11:13", "remaining_time": "1 day, 0:45:56"} +{"current_steps": 13970, "total_steps": 28254, "loss": 0.7223, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.544752017587575e-05, "epoch": 0.99, "percentage": 49.44, "elapsed_time": "1 day, 0:12:15", "remaining_time": "1 day, 0:44:54"} +{"current_steps": 13980, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.541972659012627e-05, "epoch": 0.99, "percentage": 49.48, "elapsed_time": "1 day, 0:13:17", "remaining_time": "1 day, 0:43:51"} +{"current_steps": 13990, "total_steps": 28254, "loss": 0.7137, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5391932485449738e-05, "epoch": 0.99, "percentage": 49.52, "elapsed_time": "1 day, 0:14:19", "remaining_time": "1 day, 0:42:48"} +{"current_steps": 14000, "total_steps": 28254, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.536413789620929e-05, "epoch": 0.99, "percentage": 49.55, "elapsed_time": "1 day, 0:15:21", "remaining_time": "1 day, 0:41:45"} +{"current_steps": 14010, "total_steps": 28254, "loss": 0.6973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.533634285676862e-05, "epoch": 0.99, "percentage": 49.59, "elapsed_time": "1 day, 0:16:26", "remaining_time": "1 day, 0:40:46"} +{"current_steps": 14020, "total_steps": 28254, "loss": 0.7166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.530854740149201e-05, "epoch": 0.99, "percentage": 49.62, "elapsed_time": "1 day, 0:17:29", "remaining_time": "1 day, 0:39:44"} +{"current_steps": 14030, "total_steps": 28254, "loss": 0.7395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.528075156474423e-05, "epoch": 0.99, "percentage": 49.66, "elapsed_time": "1 day, 0:18:31", "remaining_time": "1 day, 0:38:41"} +{"current_steps": 14040, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5252955380890554e-05, "epoch": 0.99, "percentage": 49.69, "elapsed_time": "1 day, 0:19:33", "remaining_time": "1 day, 0:37:39"} +{"current_steps": 14050, "total_steps": 28254, "loss": 0.6977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.522515888429664e-05, "epoch": 0.99, "percentage": 49.73, "elapsed_time": "1 day, 0:20:36", "remaining_time": "1 day, 0:36:37"} +{"current_steps": 14060, "total_steps": 28254, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5197362109328592e-05, "epoch": 1.0, "percentage": 49.76, "elapsed_time": "1 day, 0:21:40", "remaining_time": "1 day, 0:35:36"} +{"current_steps": 14070, "total_steps": 28254, "loss": 0.7036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5169565090352792e-05, "epoch": 1.0, "percentage": 49.8, "elapsed_time": "1 day, 0:22:46", "remaining_time": "1 day, 0:34:37"} +{"current_steps": 14080, "total_steps": 28254, "loss": 0.7311, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5141767861735976e-05, "epoch": 1.0, "percentage": 49.83, "elapsed_time": "1 day, 0:23:47", "remaining_time": "1 day, 0:33:34"} +{"current_steps": 14090, "total_steps": 28254, "loss": 0.7456, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.511397045784512e-05, "epoch": 1.0, "percentage": 49.87, "elapsed_time": "1 day, 0:24:50", "remaining_time": "1 day, 0:32:32"} +{"current_steps": 14100, "total_steps": 28254, "loss": 0.7164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5086172913047406e-05, "epoch": 1.0, "percentage": 49.9, "elapsed_time": "1 day, 0:25:51", "remaining_time": "1 day, 0:31:28"} +{"current_steps": 14110, "total_steps": 28254, "loss": 0.7436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.505837526171021e-05, "epoch": 1.0, "percentage": 49.94, "elapsed_time": "1 day, 0:26:55", "remaining_time": "1 day, 0:30:27"} +{"current_steps": 14120, "total_steps": 28254, "loss": 0.6857, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.503057753820103e-05, "epoch": 1.0, "percentage": 49.98, "elapsed_time": "1 day, 0:27:59", "remaining_time": "1 day, 0:29:27"} +{"current_steps": 14130, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.500277977688745e-05, "epoch": 1.0, "percentage": 50.01, "elapsed_time": "1 day, 0:29:01", "remaining_time": "1 day, 0:28:23"} +{"current_steps": 14140, "total_steps": 28254, "loss": 0.7336, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4974982012137106e-05, "epoch": 1.0, "percentage": 50.05, "elapsed_time": "1 day, 0:30:05", "remaining_time": "1 day, 0:27:23"} +{"current_steps": 14150, "total_steps": 28254, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.494718427831763e-05, "epoch": 1.0, "percentage": 50.08, "elapsed_time": "1 day, 0:31:05", "remaining_time": "1 day, 0:26:18"} +{"current_steps": 14160, "total_steps": 28254, "loss": 0.7205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.491938660979664e-05, "epoch": 1.0, "percentage": 50.12, "elapsed_time": "1 day, 0:32:04", "remaining_time": "1 day, 0:25:12"} +{"current_steps": 14170, "total_steps": 28254, "loss": 0.7325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4891589040941636e-05, "epoch": 1.0, "percentage": 50.15, "elapsed_time": "1 day, 0:33:08", "remaining_time": "1 day, 0:24:11"} +{"current_steps": 14180, "total_steps": 28254, "loss": 0.7169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4863791606120022e-05, "epoch": 1.0, "percentage": 50.19, "elapsed_time": "1 day, 0:34:11", "remaining_time": "1 day, 0:23:09"} +{"current_steps": 14190, "total_steps": 28254, "loss": 0.7421, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.483599433969903e-05, "epoch": 1.0, "percentage": 50.22, "elapsed_time": "1 day, 0:35:13", "remaining_time": "1 day, 0:22:07"} +{"current_steps": 14200, "total_steps": 28254, "loss": 0.7531, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4808197276045692e-05, "epoch": 1.01, "percentage": 50.26, "elapsed_time": "1 day, 0:36:15", "remaining_time": "1 day, 0:21:04"} +{"current_steps": 14210, "total_steps": 28254, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4780400449526762e-05, "epoch": 1.01, "percentage": 50.29, "elapsed_time": "1 day, 0:37:18", "remaining_time": "1 day, 0:20:03"} +{"current_steps": 14220, "total_steps": 28254, "loss": 0.7389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4752603894508726e-05, "epoch": 1.01, "percentage": 50.33, "elapsed_time": "1 day, 0:38:22", "remaining_time": "1 day, 0:19:01"} +{"current_steps": 14230, "total_steps": 28254, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.472480764535773e-05, "epoch": 1.01, "percentage": 50.36, "elapsed_time": "1 day, 0:39:24", "remaining_time": "1 day, 0:17:59"} +{"current_steps": 14240, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4697011736439546e-05, "epoch": 1.01, "percentage": 50.4, "elapsed_time": "1 day, 0:40:25", "remaining_time": "1 day, 0:16:55"} +{"current_steps": 14250, "total_steps": 28254, "loss": 0.7017, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.46692162021195e-05, "epoch": 1.01, "percentage": 50.44, "elapsed_time": "1 day, 0:41:29", "remaining_time": "1 day, 0:15:54"} +{"current_steps": 14260, "total_steps": 28254, "loss": 0.7451, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.464142107676248e-05, "epoch": 1.01, "percentage": 50.47, "elapsed_time": "1 day, 0:42:32", "remaining_time": "1 day, 0:14:53"} +{"current_steps": 14270, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.461362639473287e-05, "epoch": 1.01, "percentage": 50.51, "elapsed_time": "1 day, 0:43:36", "remaining_time": "1 day, 0:13:52"} +{"current_steps": 14280, "total_steps": 28254, "loss": 0.7445, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4585832190394496e-05, "epoch": 1.01, "percentage": 50.54, "elapsed_time": "1 day, 0:44:39", "remaining_time": "1 day, 0:12:50"} +{"current_steps": 14290, "total_steps": 28254, "loss": 0.6883, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4558038498110584e-05, "epoch": 1.01, "percentage": 50.58, "elapsed_time": "1 day, 0:45:42", "remaining_time": "1 day, 0:11:48"} +{"current_steps": 14300, "total_steps": 28254, "loss": 0.6903, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4530245352243738e-05, "epoch": 1.01, "percentage": 50.61, "elapsed_time": "1 day, 0:46:47", "remaining_time": "1 day, 0:10:49"} +{"current_steps": 14310, "total_steps": 28254, "loss": 0.714, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4502452787155897e-05, "epoch": 1.01, "percentage": 50.65, "elapsed_time": "1 day, 0:47:51", "remaining_time": "1 day, 0:09:47"} +{"current_steps": 14320, "total_steps": 28254, "loss": 0.7174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.447466083720827e-05, "epoch": 1.01, "percentage": 50.68, "elapsed_time": "1 day, 0:48:55", "remaining_time": "1 day, 0:08:47"} +{"current_steps": 14330, "total_steps": 28254, "loss": 0.7164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4446869536761296e-05, "epoch": 1.01, "percentage": 50.72, "elapsed_time": "1 day, 0:49:57", "remaining_time": "1 day, 0:07:44"} +{"current_steps": 14340, "total_steps": 28254, "loss": 0.746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4419078920174633e-05, "epoch": 1.02, "percentage": 50.75, "elapsed_time": "1 day, 0:51:01", "remaining_time": "1 day, 0:06:43"} +{"current_steps": 14350, "total_steps": 28254, "loss": 0.7265, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4391289021807078e-05, "epoch": 1.02, "percentage": 50.79, "elapsed_time": "1 day, 0:52:03", "remaining_time": "1 day, 0:05:41"} +{"current_steps": 14360, "total_steps": 28254, "loss": 0.7462, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.436349987601655e-05, "epoch": 1.02, "percentage": 50.82, "elapsed_time": "1 day, 0:53:05", "remaining_time": "1 day, 0:04:38"} +{"current_steps": 14370, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4335711517160013e-05, "epoch": 1.02, "percentage": 50.86, "elapsed_time": "1 day, 0:54:07", "remaining_time": "1 day, 0:03:35"} +{"current_steps": 14380, "total_steps": 28254, "loss": 0.7325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4307923979593493e-05, "epoch": 1.02, "percentage": 50.9, "elapsed_time": "1 day, 0:55:12", "remaining_time": "1 day, 0:02:35"} +{"current_steps": 14390, "total_steps": 28254, "loss": 0.6914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4280137297671975e-05, "epoch": 1.02, "percentage": 50.93, "elapsed_time": "1 day, 0:56:15", "remaining_time": "1 day, 0:01:33"} +{"current_steps": 14400, "total_steps": 28254, "loss": 0.7243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.425235150574941e-05, "epoch": 1.02, "percentage": 50.97, "elapsed_time": "1 day, 0:57:19", "remaining_time": "1 day, 0:00:33"} +{"current_steps": 14410, "total_steps": 28254, "loss": 0.7139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.422456663817863e-05, "epoch": 1.02, "percentage": 51.0, "elapsed_time": "1 day, 0:58:21", "remaining_time": "23:59:30"} +{"current_steps": 14420, "total_steps": 28254, "loss": 0.7298, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4196782729311315e-05, "epoch": 1.02, "percentage": 51.04, "elapsed_time": "1 day, 0:59:24", "remaining_time": "23:58:28"} +{"current_steps": 14430, "total_steps": 28254, "loss": 0.712, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4168999813497977e-05, "epoch": 1.02, "percentage": 51.07, "elapsed_time": "1 day, 1:00:29", "remaining_time": "23:57:28"} +{"current_steps": 14440, "total_steps": 28254, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.414121792508791e-05, "epoch": 1.02, "percentage": 51.11, "elapsed_time": "1 day, 1:01:31", "remaining_time": "23:56:25"} +{"current_steps": 14450, "total_steps": 28254, "loss": 0.6978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4113437098429118e-05, "epoch": 1.02, "percentage": 51.14, "elapsed_time": "1 day, 1:02:35", "remaining_time": "23:55:24"} +{"current_steps": 14460, "total_steps": 28254, "loss": 0.6907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.408565736786829e-05, "epoch": 1.02, "percentage": 51.18, "elapsed_time": "1 day, 1:03:35", "remaining_time": "23:54:20"} +{"current_steps": 14470, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4057878767750767e-05, "epoch": 1.02, "percentage": 51.21, "elapsed_time": "1 day, 1:04:40", "remaining_time": "23:53:20"} +{"current_steps": 14480, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4030101332420508e-05, "epoch": 1.02, "percentage": 51.25, "elapsed_time": "1 day, 1:05:42", "remaining_time": "23:52:18"} +{"current_steps": 14490, "total_steps": 28254, "loss": 0.7329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4002325096220013e-05, "epoch": 1.03, "percentage": 51.28, "elapsed_time": "1 day, 1:06:48", "remaining_time": "23:51:18"} +{"current_steps": 14500, "total_steps": 28254, "loss": 0.7507, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3974550093490295e-05, "epoch": 1.03, "percentage": 51.32, "elapsed_time": "1 day, 1:07:49", "remaining_time": "23:50:15"} +{"current_steps": 14510, "total_steps": 28254, "loss": 0.7169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3946776358570853e-05, "epoch": 1.03, "percentage": 51.36, "elapsed_time": "1 day, 1:08:55", "remaining_time": "23:49:16"} +{"current_steps": 14520, "total_steps": 28254, "loss": 0.7391, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3919003925799623e-05, "epoch": 1.03, "percentage": 51.39, "elapsed_time": "1 day, 1:10:00", "remaining_time": "23:48:16"} +{"current_steps": 14530, "total_steps": 28254, "loss": 0.729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.389123282951293e-05, "epoch": 1.03, "percentage": 51.43, "elapsed_time": "1 day, 1:11:01", "remaining_time": "23:47:12"} +{"current_steps": 14540, "total_steps": 28254, "loss": 0.7366, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3863463104045422e-05, "epoch": 1.03, "percentage": 51.46, "elapsed_time": "1 day, 1:12:02", "remaining_time": "23:46:08"} +{"current_steps": 14550, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.383569478373009e-05, "epoch": 1.03, "percentage": 51.5, "elapsed_time": "1 day, 1:13:05", "remaining_time": "23:45:07"} +{"current_steps": 14560, "total_steps": 28254, "loss": 0.7108, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.380792790289816e-05, "epoch": 1.03, "percentage": 51.53, "elapsed_time": "1 day, 1:14:08", "remaining_time": "23:44:04"} +{"current_steps": 14570, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3780162495879094e-05, "epoch": 1.03, "percentage": 51.57, "elapsed_time": "1 day, 1:15:10", "remaining_time": "23:43:01"} +{"current_steps": 14580, "total_steps": 28254, "loss": 0.7303, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3752398597000508e-05, "epoch": 1.03, "percentage": 51.6, "elapsed_time": "1 day, 1:16:13", "remaining_time": "23:42:00"} +{"current_steps": 14590, "total_steps": 28254, "loss": 0.7183, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3724636240588194e-05, "epoch": 1.03, "percentage": 51.64, "elapsed_time": "1 day, 1:17:16", "remaining_time": "23:40:59"} +{"current_steps": 14600, "total_steps": 28254, "loss": 0.6879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.369965146699447e-05, "epoch": 1.03, "percentage": 51.67, "elapsed_time": "1 day, 1:18:20", "remaining_time": "23:39:58"} +{"current_steps": 14610, "total_steps": 28254, "loss": 0.7162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.367189213582869e-05, "epoch": 1.03, "percentage": 51.71, "elapsed_time": "1 day, 1:19:23", "remaining_time": "23:38:55"} +{"current_steps": 14620, "total_steps": 28254, "loss": 0.7065, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3644134446662946e-05, "epoch": 1.03, "percentage": 51.74, "elapsed_time": "1 day, 1:20:25", "remaining_time": "23:37:53"} +{"current_steps": 14630, "total_steps": 28254, "loss": 0.7215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.361637843381536e-05, "epoch": 1.04, "percentage": 51.78, "elapsed_time": "1 day, 1:21:29", "remaining_time": "23:36:51"} +{"current_steps": 14640, "total_steps": 28254, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.358862413160193e-05, "epoch": 1.04, "percentage": 51.82, "elapsed_time": "1 day, 1:22:32", "remaining_time": "23:35:50"} +{"current_steps": 14650, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3560871574336586e-05, "epoch": 1.04, "percentage": 51.85, "elapsed_time": "1 day, 1:23:33", "remaining_time": "23:34:46"} +{"current_steps": 14660, "total_steps": 28254, "loss": 0.7008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.353312079633104e-05, "epoch": 1.04, "percentage": 51.89, "elapsed_time": "1 day, 1:24:36", "remaining_time": "23:33:44"} +{"current_steps": 14670, "total_steps": 28254, "loss": 0.7433, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3505371831894863e-05, "epoch": 1.04, "percentage": 51.92, "elapsed_time": "1 day, 1:25:40", "remaining_time": "23:32:43"} +{"current_steps": 14680, "total_steps": 28254, "loss": 0.7083, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3477624715335346e-05, "epoch": 1.04, "percentage": 51.96, "elapsed_time": "1 day, 1:26:41", "remaining_time": "23:31:40"} +{"current_steps": 14690, "total_steps": 28254, "loss": 0.7103, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3449879480957525e-05, "epoch": 1.04, "percentage": 51.99, "elapsed_time": "1 day, 1:27:46", "remaining_time": "23:30:39"} +{"current_steps": 14700, "total_steps": 28254, "loss": 0.7264, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3422136163064094e-05, "epoch": 1.04, "percentage": 52.03, "elapsed_time": "1 day, 1:28:47", "remaining_time": "23:29:36"} +{"current_steps": 14710, "total_steps": 28254, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3394394795955354e-05, "epoch": 1.04, "percentage": 52.06, "elapsed_time": "1 day, 1:29:47", "remaining_time": "23:28:32"} +{"current_steps": 14720, "total_steps": 28254, "loss": 0.7317, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3366655413929228e-05, "epoch": 1.04, "percentage": 52.1, "elapsed_time": "1 day, 1:30:52", "remaining_time": "23:27:31"} +{"current_steps": 14730, "total_steps": 28254, "loss": 0.7039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.333891805128118e-05, "epoch": 1.04, "percentage": 52.13, "elapsed_time": "1 day, 1:31:54", "remaining_time": "23:26:29"} +{"current_steps": 14740, "total_steps": 28254, "loss": 0.7199, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3311182742304173e-05, "epoch": 1.04, "percentage": 52.17, "elapsed_time": "1 day, 1:32:59", "remaining_time": "23:25:28"} +{"current_steps": 14750, "total_steps": 28254, "loss": 0.7012, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.328344952128861e-05, "epoch": 1.04, "percentage": 52.2, "elapsed_time": "1 day, 1:34:00", "remaining_time": "23:24:25"} +{"current_steps": 14760, "total_steps": 28254, "loss": 0.7678, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.325571842252235e-05, "epoch": 1.04, "percentage": 52.24, "elapsed_time": "1 day, 1:35:00", "remaining_time": "23:23:21"} +{"current_steps": 14770, "total_steps": 28254, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.32279894802906e-05, "epoch": 1.05, "percentage": 52.28, "elapsed_time": "1 day, 1:36:04", "remaining_time": "23:22:19"} +{"current_steps": 14780, "total_steps": 28254, "loss": 0.7143, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3200262728875925e-05, "epoch": 1.05, "percentage": 52.31, "elapsed_time": "1 day, 1:37:06", "remaining_time": "23:21:16"} +{"current_steps": 14790, "total_steps": 28254, "loss": 0.6973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3172538202558137e-05, "epoch": 1.05, "percentage": 52.35, "elapsed_time": "1 day, 1:38:08", "remaining_time": "23:20:14"} +{"current_steps": 14800, "total_steps": 28254, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3144815935614352e-05, "epoch": 1.05, "percentage": 52.38, "elapsed_time": "1 day, 1:39:12", "remaining_time": "23:19:13"} +{"current_steps": 14810, "total_steps": 28254, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3117095962318864e-05, "epoch": 1.05, "percentage": 52.42, "elapsed_time": "1 day, 1:40:12", "remaining_time": "23:18:08"} +{"current_steps": 14820, "total_steps": 28254, "loss": 0.7638, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.308937831694313e-05, "epoch": 1.05, "percentage": 52.45, "elapsed_time": "1 day, 1:41:15", "remaining_time": "23:17:06"} +{"current_steps": 14830, "total_steps": 28254, "loss": 0.7369, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3061663033755725e-05, "epoch": 1.05, "percentage": 52.49, "elapsed_time": "1 day, 1:42:17", "remaining_time": "23:16:04"} +{"current_steps": 14840, "total_steps": 28254, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3033950147022328e-05, "epoch": 1.05, "percentage": 52.52, "elapsed_time": "1 day, 1:43:19", "remaining_time": "23:15:01"} +{"current_steps": 14850, "total_steps": 28254, "loss": 0.7307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3006239691005626e-05, "epoch": 1.05, "percentage": 52.56, "elapsed_time": "1 day, 1:44:23", "remaining_time": "23:14:00"} +{"current_steps": 14860, "total_steps": 28254, "loss": 0.7289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.297853169996534e-05, "epoch": 1.05, "percentage": 52.59, "elapsed_time": "1 day, 1:45:25", "remaining_time": "23:12:57"} +{"current_steps": 14870, "total_steps": 28254, "loss": 0.7271, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2950826208158077e-05, "epoch": 1.05, "percentage": 52.63, "elapsed_time": "1 day, 1:46:27", "remaining_time": "23:11:55"} +{"current_steps": 14880, "total_steps": 28254, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2923123249837423e-05, "epoch": 1.05, "percentage": 52.67, "elapsed_time": "1 day, 1:47:30", "remaining_time": "23:10:53"} +{"current_steps": 14890, "total_steps": 28254, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2895422859253787e-05, "epoch": 1.05, "percentage": 52.7, "elapsed_time": "1 day, 1:48:33", "remaining_time": "23:09:51"} +{"current_steps": 14900, "total_steps": 28254, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2867725070654443e-05, "epoch": 1.05, "percentage": 52.74, "elapsed_time": "1 day, 1:49:37", "remaining_time": "23:08:50"} +{"current_steps": 14910, "total_steps": 28254, "loss": 0.7272, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2840029918283398e-05, "epoch": 1.06, "percentage": 52.77, "elapsed_time": "1 day, 1:50:38", "remaining_time": "23:07:46"} +{"current_steps": 14920, "total_steps": 28254, "loss": 0.7261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2812337436381443e-05, "epoch": 1.06, "percentage": 52.81, "elapsed_time": "1 day, 1:51:41", "remaining_time": "23:06:44"} +{"current_steps": 14930, "total_steps": 28254, "loss": 0.7273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2784647659186038e-05, "epoch": 1.06, "percentage": 52.84, "elapsed_time": "1 day, 1:52:45", "remaining_time": "23:05:43"} +{"current_steps": 14940, "total_steps": 28254, "loss": 0.7185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2756960620931332e-05, "epoch": 1.06, "percentage": 52.88, "elapsed_time": "1 day, 1:53:46", "remaining_time": "23:04:40"} +{"current_steps": 14950, "total_steps": 28254, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.272927635584805e-05, "epoch": 1.06, "percentage": 52.91, "elapsed_time": "1 day, 1:54:48", "remaining_time": "23:03:37"} +{"current_steps": 14960, "total_steps": 28254, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2701594898163505e-05, "epoch": 1.06, "percentage": 52.95, "elapsed_time": "1 day, 1:55:54", "remaining_time": "23:02:37"} +{"current_steps": 14970, "total_steps": 28254, "loss": 0.7148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2673916282101545e-05, "epoch": 1.06, "percentage": 52.98, "elapsed_time": "1 day, 1:56:55", "remaining_time": "23:01:34"} +{"current_steps": 14980, "total_steps": 28254, "loss": 0.7427, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2646240541882507e-05, "epoch": 1.06, "percentage": 53.02, "elapsed_time": "1 day, 1:57:58", "remaining_time": "23:00:32"} +{"current_steps": 14990, "total_steps": 28254, "loss": 0.7107, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2618567711723165e-05, "epoch": 1.06, "percentage": 53.05, "elapsed_time": "1 day, 1:59:01", "remaining_time": "22:59:30"} +{"current_steps": 15000, "total_steps": 28254, "loss": 0.7066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2590897825836675e-05, "epoch": 1.06, "percentage": 53.09, "elapsed_time": "1 day, 2:00:04", "remaining_time": "22:58:29"} +{"current_steps": 15010, "total_steps": 28254, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2563230918432597e-05, "epoch": 1.06, "percentage": 53.13, "elapsed_time": "1 day, 2:01:07", "remaining_time": "22:57:26"} +{"current_steps": 15020, "total_steps": 28254, "loss": 0.7009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.253556702371677e-05, "epoch": 1.06, "percentage": 53.16, "elapsed_time": "1 day, 2:02:09", "remaining_time": "22:56:24"} +{"current_steps": 15030, "total_steps": 28254, "loss": 0.7006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.250790617589134e-05, "epoch": 1.06, "percentage": 53.2, "elapsed_time": "1 day, 2:03:13", "remaining_time": "22:55:23"} +{"current_steps": 15040, "total_steps": 28254, "loss": 0.7112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2480248409154644e-05, "epoch": 1.06, "percentage": 53.23, "elapsed_time": "1 day, 2:04:15", "remaining_time": "22:54:20"} +{"current_steps": 15050, "total_steps": 28254, "loss": 0.7061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2452593757701254e-05, "epoch": 1.07, "percentage": 53.27, "elapsed_time": "1 day, 2:05:19", "remaining_time": "22:53:19"} +{"current_steps": 15060, "total_steps": 28254, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2424942255721863e-05, "epoch": 1.07, "percentage": 53.3, "elapsed_time": "1 day, 2:06:22", "remaining_time": "22:52:17"} +{"current_steps": 15070, "total_steps": 28254, "loss": 0.735, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.239729393740329e-05, "epoch": 1.07, "percentage": 53.34, "elapsed_time": "1 day, 2:07:27", "remaining_time": "22:51:17"} +{"current_steps": 15080, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2369648836928388e-05, "epoch": 1.07, "percentage": 53.37, "elapsed_time": "1 day, 2:08:30", "remaining_time": "22:50:15"} +{"current_steps": 15090, "total_steps": 28254, "loss": 0.6979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2342006988476062e-05, "epoch": 1.07, "percentage": 53.41, "elapsed_time": "1 day, 2:09:30", "remaining_time": "22:49:11"} +{"current_steps": 15100, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.231436842622118e-05, "epoch": 1.07, "percentage": 53.44, "elapsed_time": "1 day, 2:10:31", "remaining_time": "22:48:07"} +{"current_steps": 15110, "total_steps": 28254, "loss": 0.7372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2286733184334564e-05, "epoch": 1.07, "percentage": 53.48, "elapsed_time": "1 day, 2:11:33", "remaining_time": "22:47:04"} +{"current_steps": 15120, "total_steps": 28254, "loss": 0.7373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.225910129698289e-05, "epoch": 1.07, "percentage": 53.51, "elapsed_time": "1 day, 2:12:35", "remaining_time": "22:46:01"} +{"current_steps": 15130, "total_steps": 28254, "loss": 0.6994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.223147279832874e-05, "epoch": 1.07, "percentage": 53.55, "elapsed_time": "1 day, 2:13:42", "remaining_time": "22:45:03"} +{"current_steps": 15140, "total_steps": 28254, "loss": 0.7149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2203847722530476e-05, "epoch": 1.07, "percentage": 53.59, "elapsed_time": "1 day, 2:14:45", "remaining_time": "22:44:01"} +{"current_steps": 15150, "total_steps": 28254, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.217622610374223e-05, "epoch": 1.07, "percentage": 53.62, "elapsed_time": "1 day, 2:15:48", "remaining_time": "22:42:59"} +{"current_steps": 15160, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2148607976113866e-05, "epoch": 1.07, "percentage": 53.66, "elapsed_time": "1 day, 2:16:52", "remaining_time": "22:41:58"} +{"current_steps": 15170, "total_steps": 28254, "loss": 0.7363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2120993373790928e-05, "epoch": 1.07, "percentage": 53.69, "elapsed_time": "1 day, 2:17:56", "remaining_time": "22:40:57"} +{"current_steps": 15180, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.20933823309146e-05, "epoch": 1.07, "percentage": 53.73, "elapsed_time": "1 day, 2:18:58", "remaining_time": "22:39:54"} +{"current_steps": 15190, "total_steps": 28254, "loss": 0.713, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2065774881621673e-05, "epoch": 1.08, "percentage": 53.76, "elapsed_time": "1 day, 2:20:01", "remaining_time": "22:38:52"} +{"current_steps": 15200, "total_steps": 28254, "loss": 0.7228, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2038171060044488e-05, "epoch": 1.08, "percentage": 53.8, "elapsed_time": "1 day, 2:21:03", "remaining_time": "22:37:50"} +{"current_steps": 15210, "total_steps": 28254, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.20105709003109e-05, "epoch": 1.08, "percentage": 53.83, "elapsed_time": "1 day, 2:22:06", "remaining_time": "22:36:48"} +{"current_steps": 15220, "total_steps": 28254, "loss": 0.732, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.198297443654424e-05, "epoch": 1.08, "percentage": 53.87, "elapsed_time": "1 day, 2:23:09", "remaining_time": "22:35:46"} +{"current_steps": 15230, "total_steps": 28254, "loss": 0.6914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1955381702863275e-05, "epoch": 1.08, "percentage": 53.9, "elapsed_time": "1 day, 2:24:12", "remaining_time": "22:34:44"} +{"current_steps": 15240, "total_steps": 28254, "loss": 0.7144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.192779273338215e-05, "epoch": 1.08, "percentage": 53.94, "elapsed_time": "1 day, 2:25:17", "remaining_time": "22:33:44"} +{"current_steps": 15250, "total_steps": 28254, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.190020756221036e-05, "epoch": 1.08, "percentage": 53.97, "elapsed_time": "1 day, 2:26:20", "remaining_time": "22:32:42"} +{"current_steps": 15260, "total_steps": 28254, "loss": 0.6972, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1872626223452708e-05, "epoch": 1.08, "percentage": 54.01, "elapsed_time": "1 day, 2:27:21", "remaining_time": "22:31:39"} +{"current_steps": 15270, "total_steps": 28254, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.184504875120925e-05, "epoch": 1.08, "percentage": 54.05, "elapsed_time": "1 day, 2:28:23", "remaining_time": "22:30:36"} +{"current_steps": 15280, "total_steps": 28254, "loss": 0.6649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1817475179575285e-05, "epoch": 1.08, "percentage": 54.08, "elapsed_time": "1 day, 2:29:28", "remaining_time": "22:29:35"} +{"current_steps": 15290, "total_steps": 28254, "loss": 0.7261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.178990554264124e-05, "epoch": 1.08, "percentage": 54.12, "elapsed_time": "1 day, 2:30:34", "remaining_time": "22:28:36"} +{"current_steps": 15300, "total_steps": 28254, "loss": 0.7163, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1762339874492732e-05, "epoch": 1.08, "percentage": 54.15, "elapsed_time": "1 day, 2:31:34", "remaining_time": "22:27:32"} +{"current_steps": 15310, "total_steps": 28254, "loss": 0.7242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1734778209210437e-05, "epoch": 1.08, "percentage": 54.19, "elapsed_time": "1 day, 2:32:36", "remaining_time": "22:26:29"} +{"current_steps": 15320, "total_steps": 28254, "loss": 0.6934, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1707220580870115e-05, "epoch": 1.08, "percentage": 54.22, "elapsed_time": "1 day, 2:33:40", "remaining_time": "22:25:28"} +{"current_steps": 15330, "total_steps": 28254, "loss": 0.7318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1679667023542483e-05, "epoch": 1.09, "percentage": 54.26, "elapsed_time": "1 day, 2:34:44", "remaining_time": "22:24:27"} +{"current_steps": 15340, "total_steps": 28254, "loss": 0.7051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1652117571293273e-05, "epoch": 1.09, "percentage": 54.29, "elapsed_time": "1 day, 2:35:47", "remaining_time": "22:23:25"} +{"current_steps": 15350, "total_steps": 28254, "loss": 0.7365, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1624572258183113e-05, "epoch": 1.09, "percentage": 54.33, "elapsed_time": "1 day, 2:36:52", "remaining_time": "22:22:25"} +{"current_steps": 15360, "total_steps": 28254, "loss": 0.6866, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1597031118267546e-05, "epoch": 1.09, "percentage": 54.36, "elapsed_time": "1 day, 2:37:53", "remaining_time": "22:21:20"} +{"current_steps": 15370, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1569494185596904e-05, "epoch": 1.09, "percentage": 54.4, "elapsed_time": "1 day, 2:38:56", "remaining_time": "22:20:19"} +{"current_steps": 15380, "total_steps": 28254, "loss": 0.7119, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1541961494216364e-05, "epoch": 1.09, "percentage": 54.43, "elapsed_time": "1 day, 2:39:59", "remaining_time": "22:19:17"} +{"current_steps": 15390, "total_steps": 28254, "loss": 0.6931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.151443307816584e-05, "epoch": 1.09, "percentage": 54.47, "elapsed_time": "1 day, 2:41:03", "remaining_time": "22:18:16"} +{"current_steps": 15400, "total_steps": 28254, "loss": 0.6874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1486908971479967e-05, "epoch": 1.09, "percentage": 54.51, "elapsed_time": "1 day, 2:42:04", "remaining_time": "22:17:12"} +{"current_steps": 15410, "total_steps": 28254, "loss": 0.7406, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1459389208188044e-05, "epoch": 1.09, "percentage": 54.54, "elapsed_time": "1 day, 2:43:07", "remaining_time": "22:16:10"} +{"current_steps": 15420, "total_steps": 28254, "loss": 0.6964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1431873822314e-05, "epoch": 1.09, "percentage": 54.58, "elapsed_time": "1 day, 2:44:10", "remaining_time": "22:15:08"} +{"current_steps": 15430, "total_steps": 28254, "loss": 0.686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1404362847876356e-05, "epoch": 1.09, "percentage": 54.61, "elapsed_time": "1 day, 2:45:13", "remaining_time": "22:14:06"} +{"current_steps": 15440, "total_steps": 28254, "loss": 0.739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.137685631888819e-05, "epoch": 1.09, "percentage": 54.65, "elapsed_time": "1 day, 2:46:16", "remaining_time": "22:13:04"} +{"current_steps": 15450, "total_steps": 28254, "loss": 0.7261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1349354269357063e-05, "epoch": 1.09, "percentage": 54.68, "elapsed_time": "1 day, 2:47:18", "remaining_time": "22:12:02"} +{"current_steps": 15460, "total_steps": 28254, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1321856733285004e-05, "epoch": 1.09, "percentage": 54.72, "elapsed_time": "1 day, 2:48:20", "remaining_time": "22:10:59"} +{"current_steps": 15470, "total_steps": 28254, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1294363744668476e-05, "epoch": 1.1, "percentage": 54.75, "elapsed_time": "1 day, 2:49:23", "remaining_time": "22:09:57"} +{"current_steps": 15480, "total_steps": 28254, "loss": 0.7378, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1266875337498306e-05, "epoch": 1.1, "percentage": 54.79, "elapsed_time": "1 day, 2:50:26", "remaining_time": "22:08:55"} +{"current_steps": 15490, "total_steps": 28254, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1239391545759653e-05, "epoch": 1.1, "percentage": 54.82, "elapsed_time": "1 day, 2:51:28", "remaining_time": "22:07:53"} +{"current_steps": 15500, "total_steps": 28254, "loss": 0.6828, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.121191240343198e-05, "epoch": 1.1, "percentage": 54.86, "elapsed_time": "1 day, 2:52:30", "remaining_time": "22:06:50"} +{"current_steps": 15510, "total_steps": 28254, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1184437944489002e-05, "epoch": 1.1, "percentage": 54.89, "elapsed_time": "1 day, 2:53:33", "remaining_time": "22:05:48"} +{"current_steps": 15520, "total_steps": 28254, "loss": 0.7342, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1156968202898645e-05, "epoch": 1.1, "percentage": 54.93, "elapsed_time": "1 day, 2:54:36", "remaining_time": "22:04:46"} +{"current_steps": 15530, "total_steps": 28254, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1129503212622983e-05, "epoch": 1.1, "percentage": 54.97, "elapsed_time": "1 day, 2:55:37", "remaining_time": "22:03:42"} +{"current_steps": 15540, "total_steps": 28254, "loss": 0.7252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1102043007618235e-05, "epoch": 1.1, "percentage": 55.0, "elapsed_time": "1 day, 2:56:40", "remaining_time": "22:02:40"} +{"current_steps": 15550, "total_steps": 28254, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1074587621834707e-05, "epoch": 1.1, "percentage": 55.04, "elapsed_time": "1 day, 2:57:42", "remaining_time": "22:01:37"} +{"current_steps": 15560, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.104713708921673e-05, "epoch": 1.1, "percentage": 55.07, "elapsed_time": "1 day, 2:58:43", "remaining_time": "22:00:34"} +{"current_steps": 15570, "total_steps": 28254, "loss": 0.6944, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1019691443702665e-05, "epoch": 1.1, "percentage": 55.11, "elapsed_time": "1 day, 2:59:45", "remaining_time": "21:59:31"} +{"current_steps": 15580, "total_steps": 28254, "loss": 0.7005, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0992250719224775e-05, "epoch": 1.1, "percentage": 55.14, "elapsed_time": "1 day, 3:00:48", "remaining_time": "21:58:29"} +{"current_steps": 15590, "total_steps": 28254, "loss": 0.6812, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.09648149497093e-05, "epoch": 1.1, "percentage": 55.18, "elapsed_time": "1 day, 3:01:49", "remaining_time": "21:57:25"} +{"current_steps": 15600, "total_steps": 28254, "loss": 0.7119, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.093738416907631e-05, "epoch": 1.1, "percentage": 55.21, "elapsed_time": "1 day, 3:02:52", "remaining_time": "21:56:24"} +{"current_steps": 15610, "total_steps": 28254, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0909958411239747e-05, "epoch": 1.1, "percentage": 55.25, "elapsed_time": "1 day, 3:03:54", "remaining_time": "21:55:21"} +{"current_steps": 15620, "total_steps": 28254, "loss": 0.7042, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.08825377101073e-05, "epoch": 1.11, "percentage": 55.28, "elapsed_time": "1 day, 3:04:55", "remaining_time": "21:54:17"} +{"current_steps": 15630, "total_steps": 28254, "loss": 0.7251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.085512209958044e-05, "epoch": 1.11, "percentage": 55.32, "elapsed_time": "1 day, 3:05:56", "remaining_time": "21:53:14"} +{"current_steps": 15640, "total_steps": 28254, "loss": 0.7128, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0827711613554313e-05, "epoch": 1.11, "percentage": 55.35, "elapsed_time": "1 day, 3:06:59", "remaining_time": "21:52:12"} +{"current_steps": 15650, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.080030628591777e-05, "epoch": 1.11, "percentage": 55.39, "elapsed_time": "1 day, 3:08:04", "remaining_time": "21:51:11"} +{"current_steps": 15660, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.077290615055325e-05, "epoch": 1.11, "percentage": 55.43, "elapsed_time": "1 day, 3:09:06", "remaining_time": "21:50:08"} +{"current_steps": 15670, "total_steps": 28254, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0745511241336787e-05, "epoch": 1.11, "percentage": 55.46, "elapsed_time": "1 day, 3:10:06", "remaining_time": "21:49:04"} +{"current_steps": 15680, "total_steps": 28254, "loss": 0.7279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0718121592137946e-05, "epoch": 1.11, "percentage": 55.5, "elapsed_time": "1 day, 3:11:09", "remaining_time": "21:48:02"} +{"current_steps": 15690, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0690737236819807e-05, "epoch": 1.11, "percentage": 55.53, "elapsed_time": "1 day, 3:12:13", "remaining_time": "21:47:01"} +{"current_steps": 15700, "total_steps": 28254, "loss": 0.7168, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0663358209238877e-05, "epoch": 1.11, "percentage": 55.57, "elapsed_time": "1 day, 3:13:15", "remaining_time": "21:45:58"} +{"current_steps": 15710, "total_steps": 28254, "loss": 0.7198, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0635984543245092e-05, "epoch": 1.11, "percentage": 55.6, "elapsed_time": "1 day, 3:14:18", "remaining_time": "21:44:56"} +{"current_steps": 15720, "total_steps": 28254, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0608616272681768e-05, "epoch": 1.11, "percentage": 55.64, "elapsed_time": "1 day, 3:15:21", "remaining_time": "21:43:55"} +{"current_steps": 15730, "total_steps": 28254, "loss": 0.7136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0581253431385546e-05, "epoch": 1.11, "percentage": 55.67, "elapsed_time": "1 day, 3:16:23", "remaining_time": "21:42:52"} +{"current_steps": 15740, "total_steps": 28254, "loss": 0.7061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.055389605318633e-05, "epoch": 1.11, "percentage": 55.71, "elapsed_time": "1 day, 3:17:24", "remaining_time": "21:41:48"} +{"current_steps": 15750, "total_steps": 28254, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0526544171907293e-05, "epoch": 1.11, "percentage": 55.74, "elapsed_time": "1 day, 3:18:27", "remaining_time": "21:40:46"} +{"current_steps": 15760, "total_steps": 28254, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0499197821364813e-05, "epoch": 1.12, "percentage": 55.78, "elapsed_time": "1 day, 3:19:32", "remaining_time": "21:39:46"} +{"current_steps": 15770, "total_steps": 28254, "loss": 0.7496, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0471857035368435e-05, "epoch": 1.12, "percentage": 55.82, "elapsed_time": "1 day, 3:20:34", "remaining_time": "21:38:43"} +{"current_steps": 15780, "total_steps": 28254, "loss": 0.7285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0444521847720797e-05, "epoch": 1.12, "percentage": 55.85, "elapsed_time": "1 day, 3:21:36", "remaining_time": "21:37:41"} +{"current_steps": 15790, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0417192292217632e-05, "epoch": 1.12, "percentage": 55.89, "elapsed_time": "1 day, 3:22:41", "remaining_time": "21:36:40"} +{"current_steps": 15800, "total_steps": 28254, "loss": 0.7189, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0389868402647725e-05, "epoch": 1.12, "percentage": 55.92, "elapsed_time": "1 day, 3:23:44", "remaining_time": "21:35:38"} +{"current_steps": 15810, "total_steps": 28254, "loss": 0.7422, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0362550212792837e-05, "epoch": 1.12, "percentage": 55.96, "elapsed_time": "1 day, 3:24:49", "remaining_time": "21:34:38"} +{"current_steps": 15820, "total_steps": 28254, "loss": 0.7565, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.033523775642768e-05, "epoch": 1.12, "percentage": 55.99, "elapsed_time": "1 day, 3:25:52", "remaining_time": "21:33:36"} +{"current_steps": 15830, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.030793106731988e-05, "epoch": 1.12, "percentage": 56.03, "elapsed_time": "1 day, 3:26:54", "remaining_time": "21:32:33"} +{"current_steps": 15840, "total_steps": 28254, "loss": 0.7139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0280630179229948e-05, "epoch": 1.12, "percentage": 56.06, "elapsed_time": "1 day, 3:27:57", "remaining_time": "21:31:31"} +{"current_steps": 15850, "total_steps": 28254, "loss": 0.7106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0253335125911204e-05, "epoch": 1.12, "percentage": 56.1, "elapsed_time": "1 day, 3:28:58", "remaining_time": "21:30:28"} +{"current_steps": 15860, "total_steps": 28254, "loss": 0.7057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.022604594110978e-05, "epoch": 1.12, "percentage": 56.13, "elapsed_time": "1 day, 3:30:00", "remaining_time": "21:29:25"} +{"current_steps": 15870, "total_steps": 28254, "loss": 0.7363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0198762658564505e-05, "epoch": 1.12, "percentage": 56.17, "elapsed_time": "1 day, 3:31:03", "remaining_time": "21:28:23"} +{"current_steps": 15880, "total_steps": 28254, "loss": 0.6854, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0171485312006962e-05, "epoch": 1.12, "percentage": 56.2, "elapsed_time": "1 day, 3:32:06", "remaining_time": "21:27:21"} +{"current_steps": 15890, "total_steps": 28254, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0144213935161353e-05, "epoch": 1.12, "percentage": 56.24, "elapsed_time": "1 day, 3:33:08", "remaining_time": "21:26:18"} +{"current_steps": 15900, "total_steps": 28254, "loss": 0.7322, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0116948561744548e-05, "epoch": 1.13, "percentage": 56.28, "elapsed_time": "1 day, 3:34:13", "remaining_time": "21:25:18"} +{"current_steps": 15910, "total_steps": 28254, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0089689225465942e-05, "epoch": 1.13, "percentage": 56.31, "elapsed_time": "1 day, 3:35:15", "remaining_time": "21:24:15"} +{"current_steps": 15920, "total_steps": 28254, "loss": 0.7279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0062435960027497e-05, "epoch": 1.13, "percentage": 56.35, "elapsed_time": "1 day, 3:36:17", "remaining_time": "21:23:12"} +{"current_steps": 15930, "total_steps": 28254, "loss": 0.6928, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0035188799123657e-05, "epoch": 1.13, "percentage": 56.38, "elapsed_time": "1 day, 3:37:20", "remaining_time": "21:22:10"} +{"current_steps": 15940, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0007947776441344e-05, "epoch": 1.13, "percentage": 56.42, "elapsed_time": "1 day, 3:38:25", "remaining_time": "21:21:09"} +{"current_steps": 15950, "total_steps": 28254, "loss": 0.7355, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9980712925659854e-05, "epoch": 1.13, "percentage": 56.45, "elapsed_time": "1 day, 3:39:29", "remaining_time": "21:20:08"} +{"current_steps": 15960, "total_steps": 28254, "loss": 0.7238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9953484280450865e-05, "epoch": 1.13, "percentage": 56.49, "elapsed_time": "1 day, 3:40:30", "remaining_time": "21:19:05"} +{"current_steps": 15970, "total_steps": 28254, "loss": 0.7005, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9926261874478403e-05, "epoch": 1.13, "percentage": 56.52, "elapsed_time": "1 day, 3:41:34", "remaining_time": "21:18:04"} +{"current_steps": 15980, "total_steps": 28254, "loss": 0.7617, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9899045741398764e-05, "epoch": 1.13, "percentage": 56.56, "elapsed_time": "1 day, 3:42:38", "remaining_time": "21:17:02"} +{"current_steps": 15990, "total_steps": 28254, "loss": 0.7366, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9871835914860473e-05, "epoch": 1.13, "percentage": 56.59, "elapsed_time": "1 day, 3:43:41", "remaining_time": "21:16:01"} +{"current_steps": 16000, "total_steps": 28254, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9844632428504282e-05, "epoch": 1.13, "percentage": 56.63, "elapsed_time": "1 day, 3:44:43", "remaining_time": "21:14:58"} +{"current_steps": 16010, "total_steps": 28254, "loss": 0.7133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.98174353159631e-05, "epoch": 1.13, "percentage": 56.66, "elapsed_time": "1 day, 3:45:47", "remaining_time": "21:13:56"} +{"current_steps": 16020, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9790244610861956e-05, "epoch": 1.13, "percentage": 56.7, "elapsed_time": "1 day, 3:46:46", "remaining_time": "21:12:52"} +{"current_steps": 16030, "total_steps": 28254, "loss": 0.7152, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9763060346817946e-05, "epoch": 1.13, "percentage": 56.74, "elapsed_time": "1 day, 3:47:48", "remaining_time": "21:11:49"} +{"current_steps": 16040, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.97358825574402e-05, "epoch": 1.14, "percentage": 56.77, "elapsed_time": "1 day, 3:48:51", "remaining_time": "21:10:47"} +{"current_steps": 16050, "total_steps": 28254, "loss": 0.7118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9708711276329876e-05, "epoch": 1.14, "percentage": 56.81, "elapsed_time": "1 day, 3:49:52", "remaining_time": "21:09:44"} +{"current_steps": 16060, "total_steps": 28254, "loss": 0.6991, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.968154653708005e-05, "epoch": 1.14, "percentage": 56.84, "elapsed_time": "1 day, 3:50:55", "remaining_time": "21:08:41"} +{"current_steps": 16070, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9654388373275724e-05, "epoch": 1.14, "percentage": 56.88, "elapsed_time": "1 day, 3:51:57", "remaining_time": "21:07:39"} +{"current_steps": 16080, "total_steps": 28254, "loss": 0.7283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9627236818493757e-05, "epoch": 1.14, "percentage": 56.91, "elapsed_time": "1 day, 3:52:59", "remaining_time": "21:06:36"} +{"current_steps": 16090, "total_steps": 28254, "loss": 0.6877, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9600091906302866e-05, "epoch": 1.14, "percentage": 56.95, "elapsed_time": "1 day, 3:54:02", "remaining_time": "21:05:34"} +{"current_steps": 16100, "total_steps": 28254, "loss": 0.6961, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9572953670263543e-05, "epoch": 1.14, "percentage": 56.98, "elapsed_time": "1 day, 3:55:04", "remaining_time": "21:04:31"} +{"current_steps": 16110, "total_steps": 28254, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9545822143927996e-05, "epoch": 1.14, "percentage": 57.02, "elapsed_time": "1 day, 3:56:06", "remaining_time": "21:03:29"} +{"current_steps": 16120, "total_steps": 28254, "loss": 0.7358, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9518697360840184e-05, "epoch": 1.14, "percentage": 57.05, "elapsed_time": "1 day, 3:57:08", "remaining_time": "21:02:26"} +{"current_steps": 16130, "total_steps": 28254, "loss": 0.7076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9491579354535704e-05, "epoch": 1.14, "percentage": 57.09, "elapsed_time": "1 day, 3:58:13", "remaining_time": "21:01:25"} +{"current_steps": 16140, "total_steps": 28254, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.946446815854177e-05, "epoch": 1.14, "percentage": 57.12, "elapsed_time": "1 day, 3:59:15", "remaining_time": "21:00:22"} +{"current_steps": 16150, "total_steps": 28254, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9437363806377202e-05, "epoch": 1.14, "percentage": 57.16, "elapsed_time": "1 day, 4:00:18", "remaining_time": "20:59:20"} +{"current_steps": 16160, "total_steps": 28254, "loss": 0.707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9410266331552324e-05, "epoch": 1.14, "percentage": 57.2, "elapsed_time": "1 day, 4:01:19", "remaining_time": "20:58:17"} +{"current_steps": 16170, "total_steps": 28254, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9383175767568974e-05, "epoch": 1.14, "percentage": 57.23, "elapsed_time": "1 day, 4:02:24", "remaining_time": "20:57:16"} +{"current_steps": 16180, "total_steps": 28254, "loss": 0.7466, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.935609214792046e-05, "epoch": 1.15, "percentage": 57.27, "elapsed_time": "1 day, 4:03:25", "remaining_time": "20:56:13"} +{"current_steps": 16190, "total_steps": 28254, "loss": 0.7404, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.932901550609149e-05, "epoch": 1.15, "percentage": 57.3, "elapsed_time": "1 day, 4:04:28", "remaining_time": "20:55:11"} +{"current_steps": 16200, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9301945875558136e-05, "epoch": 1.15, "percentage": 57.34, "elapsed_time": "1 day, 4:05:30", "remaining_time": "20:54:08"} +{"current_steps": 16210, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9274883289787807e-05, "epoch": 1.15, "percentage": 57.37, "elapsed_time": "1 day, 4:06:31", "remaining_time": "20:53:05"} +{"current_steps": 16220, "total_steps": 28254, "loss": 0.6996, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.924782778223922e-05, "epoch": 1.15, "percentage": 57.41, "elapsed_time": "1 day, 4:07:35", "remaining_time": "20:52:03"} +{"current_steps": 16230, "total_steps": 28254, "loss": 0.7491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.922077938636233e-05, "epoch": 1.15, "percentage": 57.44, "elapsed_time": "1 day, 4:08:39", "remaining_time": "20:51:02"} +{"current_steps": 16240, "total_steps": 28254, "loss": 0.7379, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.919373813559828e-05, "epoch": 1.15, "percentage": 57.48, "elapsed_time": "1 day, 4:09:42", "remaining_time": "20:50:00"} +{"current_steps": 16250, "total_steps": 28254, "loss": 0.711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9166704063379398e-05, "epoch": 1.15, "percentage": 57.51, "elapsed_time": "1 day, 4:10:46", "remaining_time": "20:48:59"} +{"current_steps": 16260, "total_steps": 28254, "loss": 0.7174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9139677203129146e-05, "epoch": 1.15, "percentage": 57.55, "elapsed_time": "1 day, 4:11:49", "remaining_time": "20:47:57"} +{"current_steps": 16270, "total_steps": 28254, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9112657588262064e-05, "epoch": 1.15, "percentage": 57.58, "elapsed_time": "1 day, 4:12:51", "remaining_time": "20:46:54"} +{"current_steps": 16280, "total_steps": 28254, "loss": 0.7164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9085645252183716e-05, "epoch": 1.15, "percentage": 57.62, "elapsed_time": "1 day, 4:13:52", "remaining_time": "20:45:51"} +{"current_steps": 16290, "total_steps": 28254, "loss": 0.6892, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.905864022829067e-05, "epoch": 1.15, "percentage": 57.66, "elapsed_time": "1 day, 4:14:55", "remaining_time": "20:44:49"} +{"current_steps": 16300, "total_steps": 28254, "loss": 0.7483, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9031642549970484e-05, "epoch": 1.15, "percentage": 57.69, "elapsed_time": "1 day, 4:15:58", "remaining_time": "20:43:47"} +{"current_steps": 16310, "total_steps": 28254, "loss": 0.7138, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9004652250601612e-05, "epoch": 1.15, "percentage": 57.73, "elapsed_time": "1 day, 4:17:01", "remaining_time": "20:42:45"} +{"current_steps": 16320, "total_steps": 28254, "loss": 0.7318, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.897766936355337e-05, "epoch": 1.16, "percentage": 57.76, "elapsed_time": "1 day, 4:18:02", "remaining_time": "20:41:41"} +{"current_steps": 16330, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8950693922185938e-05, "epoch": 1.16, "percentage": 57.8, "elapsed_time": "1 day, 4:19:05", "remaining_time": "20:40:39"} +{"current_steps": 16340, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.892372595985028e-05, "epoch": 1.16, "percentage": 57.83, "elapsed_time": "1 day, 4:20:06", "remaining_time": "20:39:36"} +{"current_steps": 16350, "total_steps": 28254, "loss": 0.6814, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8896765509888114e-05, "epoch": 1.16, "percentage": 57.87, "elapsed_time": "1 day, 4:21:11", "remaining_time": "20:38:35"} +{"current_steps": 16360, "total_steps": 28254, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8869812605631854e-05, "epoch": 1.16, "percentage": 57.9, "elapsed_time": "1 day, 4:22:12", "remaining_time": "20:37:32"} +{"current_steps": 16370, "total_steps": 28254, "loss": 0.7421, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8842867280404614e-05, "epoch": 1.16, "percentage": 57.94, "elapsed_time": "1 day, 4:23:15", "remaining_time": "20:36:30"} +{"current_steps": 16380, "total_steps": 28254, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8815929567520118e-05, "epoch": 1.16, "percentage": 57.97, "elapsed_time": "1 day, 4:24:18", "remaining_time": "20:35:27"} +{"current_steps": 16390, "total_steps": 28254, "loss": 0.7133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.878899950028269e-05, "epoch": 1.16, "percentage": 58.01, "elapsed_time": "1 day, 4:25:21", "remaining_time": "20:34:26"} +{"current_steps": 16400, "total_steps": 28254, "loss": 0.7258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.876207711198718e-05, "epoch": 1.16, "percentage": 58.04, "elapsed_time": "1 day, 4:26:22", "remaining_time": "20:33:22"} +{"current_steps": 16410, "total_steps": 28254, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.873516243591897e-05, "epoch": 1.16, "percentage": 58.08, "elapsed_time": "1 day, 4:27:24", "remaining_time": "20:32:19"} +{"current_steps": 16420, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.870825550535389e-05, "epoch": 1.16, "percentage": 58.12, "elapsed_time": "1 day, 4:28:26", "remaining_time": "20:31:17"} +{"current_steps": 16430, "total_steps": 28254, "loss": 0.7491, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8681356353558203e-05, "epoch": 1.16, "percentage": 58.15, "elapsed_time": "1 day, 4:29:29", "remaining_time": "20:30:15"} +{"current_steps": 16440, "total_steps": 28254, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8654465013788565e-05, "epoch": 1.16, "percentage": 58.19, "elapsed_time": "1 day, 4:30:29", "remaining_time": "20:29:11"} +{"current_steps": 16450, "total_steps": 28254, "loss": 0.7179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.862758151929194e-05, "epoch": 1.16, "percentage": 58.22, "elapsed_time": "1 day, 4:31:30", "remaining_time": "20:28:07"} +{"current_steps": 16460, "total_steps": 28254, "loss": 0.6968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.860070590330562e-05, "epoch": 1.17, "percentage": 58.26, "elapsed_time": "1 day, 4:32:33", "remaining_time": "20:27:05"} +{"current_steps": 16470, "total_steps": 28254, "loss": 0.6621, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.857383819905715e-05, "epoch": 1.17, "percentage": 58.29, "elapsed_time": "1 day, 4:33:36", "remaining_time": "20:26:03"} +{"current_steps": 16480, "total_steps": 28254, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.85469784397643e-05, "epoch": 1.17, "percentage": 58.33, "elapsed_time": "1 day, 4:34:39", "remaining_time": "20:25:01"} +{"current_steps": 16490, "total_steps": 28254, "loss": 0.747, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8520126658635e-05, "epoch": 1.17, "percentage": 58.36, "elapsed_time": "1 day, 4:35:42", "remaining_time": "20:23:59"} +{"current_steps": 16500, "total_steps": 28254, "loss": 0.7053, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.849328288886732e-05, "epoch": 1.17, "percentage": 58.4, "elapsed_time": "1 day, 4:36:46", "remaining_time": "20:22:57"} +{"current_steps": 16510, "total_steps": 28254, "loss": 0.7356, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8466447163649447e-05, "epoch": 1.17, "percentage": 58.43, "elapsed_time": "1 day, 4:37:47", "remaining_time": "20:21:54"} +{"current_steps": 16520, "total_steps": 28254, "loss": 0.7242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8439619516159605e-05, "epoch": 1.17, "percentage": 58.47, "elapsed_time": "1 day, 4:38:48", "remaining_time": "20:20:51"} +{"current_steps": 16530, "total_steps": 28254, "loss": 0.7214, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.841279997956602e-05, "epoch": 1.17, "percentage": 58.5, "elapsed_time": "1 day, 4:39:50", "remaining_time": "20:19:48"} +{"current_steps": 16540, "total_steps": 28254, "loss": 0.7189, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8385988587026908e-05, "epoch": 1.17, "percentage": 58.54, "elapsed_time": "1 day, 4:40:52", "remaining_time": "20:18:46"} +{"current_steps": 16550, "total_steps": 28254, "loss": 0.7264, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8359185371690418e-05, "epoch": 1.17, "percentage": 58.58, "elapsed_time": "1 day, 4:41:55", "remaining_time": "20:17:43"} +{"current_steps": 16560, "total_steps": 28254, "loss": 0.7173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8332390366694587e-05, "epoch": 1.17, "percentage": 58.61, "elapsed_time": "1 day, 4:42:57", "remaining_time": "20:16:40"} +{"current_steps": 16570, "total_steps": 28254, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8305603605167268e-05, "epoch": 1.17, "percentage": 58.65, "elapsed_time": "1 day, 4:43:59", "remaining_time": "20:15:38"} +{"current_steps": 16580, "total_steps": 28254, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.827882512022618e-05, "epoch": 1.17, "percentage": 58.68, "elapsed_time": "1 day, 4:45:03", "remaining_time": "20:14:37"} +{"current_steps": 16590, "total_steps": 28254, "loss": 0.7185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.825205494497877e-05, "epoch": 1.17, "percentage": 58.72, "elapsed_time": "1 day, 4:46:04", "remaining_time": "20:13:33"} +{"current_steps": 16600, "total_steps": 28254, "loss": 0.7138, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8225293112522222e-05, "epoch": 1.17, "percentage": 58.75, "elapsed_time": "1 day, 4:47:07", "remaining_time": "20:12:31"} +{"current_steps": 16610, "total_steps": 28254, "loss": 0.6779, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.819853965594339e-05, "epoch": 1.18, "percentage": 58.79, "elapsed_time": "1 day, 4:48:10", "remaining_time": "20:11:29"} +{"current_steps": 16620, "total_steps": 28254, "loss": 0.7251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8171794608318813e-05, "epoch": 1.18, "percentage": 58.82, "elapsed_time": "1 day, 4:49:11", "remaining_time": "20:10:26"} +{"current_steps": 16630, "total_steps": 28254, "loss": 0.7461, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8145058002714587e-05, "epoch": 1.18, "percentage": 58.86, "elapsed_time": "1 day, 4:50:14", "remaining_time": "20:09:23"} +{"current_steps": 16640, "total_steps": 28254, "loss": 0.7335, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8118329872186412e-05, "epoch": 1.18, "percentage": 58.89, "elapsed_time": "1 day, 4:51:18", "remaining_time": "20:08:22"} +{"current_steps": 16650, "total_steps": 28254, "loss": 0.6869, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.809161024977946e-05, "epoch": 1.18, "percentage": 58.93, "elapsed_time": "1 day, 4:52:21", "remaining_time": "20:07:20"} +{"current_steps": 16660, "total_steps": 28254, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8064899168528438e-05, "epoch": 1.18, "percentage": 58.97, "elapsed_time": "1 day, 4:53:22", "remaining_time": "20:06:17"} +{"current_steps": 16670, "total_steps": 28254, "loss": 0.7197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8038196661457456e-05, "epoch": 1.18, "percentage": 59.0, "elapsed_time": "1 day, 4:54:25", "remaining_time": "20:05:15"} +{"current_steps": 16680, "total_steps": 28254, "loss": 0.6936, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8011502761580056e-05, "epoch": 1.18, "percentage": 59.04, "elapsed_time": "1 day, 4:55:28", "remaining_time": "20:04:13"} +{"current_steps": 16690, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7984817501899084e-05, "epoch": 1.18, "percentage": 59.07, "elapsed_time": "1 day, 4:56:27", "remaining_time": "20:03:08"} +{"current_steps": 16700, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7958140915406764e-05, "epoch": 1.18, "percentage": 59.11, "elapsed_time": "1 day, 4:57:32", "remaining_time": "20:02:08"} +{"current_steps": 16710, "total_steps": 28254, "loss": 0.7225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.793147303508456e-05, "epoch": 1.18, "percentage": 59.14, "elapsed_time": "1 day, 4:58:34", "remaining_time": "20:01:05"} +{"current_steps": 16720, "total_steps": 28254, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7904813893903194e-05, "epoch": 1.18, "percentage": 59.18, "elapsed_time": "1 day, 4:59:36", "remaining_time": "20:00:02"} +{"current_steps": 16730, "total_steps": 28254, "loss": 0.7347, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7878163524822566e-05, "epoch": 1.18, "percentage": 59.21, "elapsed_time": "1 day, 5:00:38", "remaining_time": "19:58:59"} +{"current_steps": 16740, "total_steps": 28254, "loss": 0.7126, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.785152196079174e-05, "epoch": 1.18, "percentage": 59.25, "elapsed_time": "1 day, 5:01:40", "remaining_time": "19:57:56"} +{"current_steps": 16750, "total_steps": 28254, "loss": 0.6867, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7824889234748875e-05, "epoch": 1.19, "percentage": 59.28, "elapsed_time": "1 day, 5:02:43", "remaining_time": "19:56:54"} +{"current_steps": 16760, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7798265379621244e-05, "epoch": 1.19, "percentage": 59.32, "elapsed_time": "1 day, 5:03:48", "remaining_time": "19:55:54"} +{"current_steps": 16770, "total_steps": 28254, "loss": 0.6824, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.777165042832512e-05, "epoch": 1.19, "percentage": 59.35, "elapsed_time": "1 day, 5:04:50", "remaining_time": "19:54:51"} +{"current_steps": 16780, "total_steps": 28254, "loss": 0.7436, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7745044413765766e-05, "epoch": 1.19, "percentage": 59.39, "elapsed_time": "1 day, 5:05:51", "remaining_time": "19:53:47"} +{"current_steps": 16790, "total_steps": 28254, "loss": 0.7273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7718447368837415e-05, "epoch": 1.19, "percentage": 59.43, "elapsed_time": "1 day, 5:06:52", "remaining_time": "19:52:44"} +{"current_steps": 16800, "total_steps": 28254, "loss": 0.7286, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7691859326423198e-05, "epoch": 1.19, "percentage": 59.46, "elapsed_time": "1 day, 5:07:55", "remaining_time": "19:51:42"} +{"current_steps": 16810, "total_steps": 28254, "loss": 0.7027, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.766528031939513e-05, "epoch": 1.19, "percentage": 59.5, "elapsed_time": "1 day, 5:08:59", "remaining_time": "19:50:41"} +{"current_steps": 16820, "total_steps": 28254, "loss": 0.7411, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7638710380614016e-05, "epoch": 1.19, "percentage": 59.53, "elapsed_time": "1 day, 5:10:02", "remaining_time": "19:49:39"} +{"current_steps": 16830, "total_steps": 28254, "loss": 0.7129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7612149542929506e-05, "epoch": 1.19, "percentage": 59.57, "elapsed_time": "1 day, 5:11:06", "remaining_time": "19:48:38"} +{"current_steps": 16840, "total_steps": 28254, "loss": 0.7052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.758559783917996e-05, "epoch": 1.19, "percentage": 59.6, "elapsed_time": "1 day, 5:12:08", "remaining_time": "19:47:35"} +{"current_steps": 16850, "total_steps": 28254, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7559055302192458e-05, "epoch": 1.19, "percentage": 59.64, "elapsed_time": "1 day, 5:13:10", "remaining_time": "19:46:32"} +{"current_steps": 16860, "total_steps": 28254, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.753252196478273e-05, "epoch": 1.19, "percentage": 59.67, "elapsed_time": "1 day, 5:14:14", "remaining_time": "19:45:30"} +{"current_steps": 16870, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7505997859755162e-05, "epoch": 1.19, "percentage": 59.71, "elapsed_time": "1 day, 5:15:17", "remaining_time": "19:44:28"} +{"current_steps": 16880, "total_steps": 28254, "loss": 0.7339, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7479483019902697e-05, "epoch": 1.19, "percentage": 59.74, "elapsed_time": "1 day, 5:16:18", "remaining_time": "19:43:25"} +{"current_steps": 16890, "total_steps": 28254, "loss": 0.7055, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.745297747800686e-05, "epoch": 1.2, "percentage": 59.78, "elapsed_time": "1 day, 5:17:20", "remaining_time": "19:42:23"} +{"current_steps": 16900, "total_steps": 28254, "loss": 0.7098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.742648126683762e-05, "epoch": 1.2, "percentage": 59.81, "elapsed_time": "1 day, 5:18:22", "remaining_time": "19:41:20"} +{"current_steps": 16910, "total_steps": 28254, "loss": 0.7118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.739999441915347e-05, "epoch": 1.2, "percentage": 59.85, "elapsed_time": "1 day, 5:19:26", "remaining_time": "19:40:18"} +{"current_steps": 16920, "total_steps": 28254, "loss": 0.7336, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.737351696770129e-05, "epoch": 1.2, "percentage": 59.89, "elapsed_time": "1 day, 5:20:30", "remaining_time": "19:39:17"} +{"current_steps": 16930, "total_steps": 28254, "loss": 0.68, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.734704894521637e-05, "epoch": 1.2, "percentage": 59.92, "elapsed_time": "1 day, 5:21:33", "remaining_time": "19:38:15"} +{"current_steps": 16940, "total_steps": 28254, "loss": 0.7092, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7320590384422316e-05, "epoch": 1.2, "percentage": 59.96, "elapsed_time": "1 day, 5:22:36", "remaining_time": "19:37:13"} +{"current_steps": 16950, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7294141318031053e-05, "epoch": 1.2, "percentage": 59.99, "elapsed_time": "1 day, 5:23:38", "remaining_time": "19:36:10"} +{"current_steps": 16960, "total_steps": 28254, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7267701778742752e-05, "epoch": 1.2, "percentage": 60.03, "elapsed_time": "1 day, 5:24:41", "remaining_time": "19:35:08"} +{"current_steps": 16970, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.724127179924584e-05, "epoch": 1.2, "percentage": 60.06, "elapsed_time": "1 day, 5:25:43", "remaining_time": "19:34:06"} +{"current_steps": 16980, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7214851412216877e-05, "epoch": 1.2, "percentage": 60.1, "elapsed_time": "1 day, 5:26:46", "remaining_time": "19:33:03"} +{"current_steps": 16990, "total_steps": 28254, "loss": 0.7324, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7188440650320596e-05, "epoch": 1.2, "percentage": 60.13, "elapsed_time": "1 day, 5:27:48", "remaining_time": "19:32:00"} +{"current_steps": 17000, "total_steps": 28254, "loss": 0.7048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.716203954620982e-05, "epoch": 1.2, "percentage": 60.17, "elapsed_time": "1 day, 5:28:50", "remaining_time": "19:30:58"} +{"current_steps": 17010, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7135648132525434e-05, "epoch": 1.2, "percentage": 60.2, "elapsed_time": "1 day, 5:29:54", "remaining_time": "19:29:56"} +{"current_steps": 17020, "total_steps": 28254, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7109266441896346e-05, "epoch": 1.2, "percentage": 60.24, "elapsed_time": "1 day, 5:30:57", "remaining_time": "19:28:55"} +{"current_steps": 17030, "total_steps": 28254, "loss": 0.7157, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7082894506939423e-05, "epoch": 1.21, "percentage": 60.27, "elapsed_time": "1 day, 5:32:00", "remaining_time": "19:27:53"} +{"current_steps": 17040, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7056532360259504e-05, "epoch": 1.21, "percentage": 60.31, "elapsed_time": "1 day, 5:33:03", "remaining_time": "19:26:50"} +{"current_steps": 17050, "total_steps": 28254, "loss": 0.7024, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7030180034449294e-05, "epoch": 1.21, "percentage": 60.35, "elapsed_time": "1 day, 5:34:05", "remaining_time": "19:25:48"} +{"current_steps": 17060, "total_steps": 28254, "loss": 0.7169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.700383756208938e-05, "epoch": 1.21, "percentage": 60.38, "elapsed_time": "1 day, 5:35:08", "remaining_time": "19:24:46"} +{"current_steps": 17070, "total_steps": 28254, "loss": 0.7279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6977504975748147e-05, "epoch": 1.21, "percentage": 60.42, "elapsed_time": "1 day, 5:36:11", "remaining_time": "19:23:44"} +{"current_steps": 17080, "total_steps": 28254, "loss": 0.6765, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.695118230798177e-05, "epoch": 1.21, "percentage": 60.45, "elapsed_time": "1 day, 5:37:13", "remaining_time": "19:22:41"} +{"current_steps": 17090, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6924869591334168e-05, "epoch": 1.21, "percentage": 60.49, "elapsed_time": "1 day, 5:38:15", "remaining_time": "19:21:38"} +{"current_steps": 17100, "total_steps": 28254, "loss": 0.7001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6898566858336942e-05, "epoch": 1.21, "percentage": 60.52, "elapsed_time": "1 day, 5:39:17", "remaining_time": "19:20:36"} +{"current_steps": 17110, "total_steps": 28254, "loss": 0.7301, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6872274141509342e-05, "epoch": 1.21, "percentage": 60.56, "elapsed_time": "1 day, 5:40:20", "remaining_time": "19:19:33"} +{"current_steps": 17120, "total_steps": 28254, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6845991473358264e-05, "epoch": 1.21, "percentage": 60.59, "elapsed_time": "1 day, 5:41:21", "remaining_time": "19:18:30"} +{"current_steps": 17130, "total_steps": 28254, "loss": 0.7389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.681971888637815e-05, "epoch": 1.21, "percentage": 60.63, "elapsed_time": "1 day, 5:42:23", "remaining_time": "19:17:27"} +{"current_steps": 17140, "total_steps": 28254, "loss": 0.7423, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6793456413051016e-05, "epoch": 1.21, "percentage": 60.66, "elapsed_time": "1 day, 5:43:25", "remaining_time": "19:16:25"} +{"current_steps": 17150, "total_steps": 28254, "loss": 0.7027, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6767204085846324e-05, "epoch": 1.21, "percentage": 60.7, "elapsed_time": "1 day, 5:44:28", "remaining_time": "19:15:23"} +{"current_steps": 17160, "total_steps": 28254, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.674096193722103e-05, "epoch": 1.21, "percentage": 60.73, "elapsed_time": "1 day, 5:45:31", "remaining_time": "19:14:20"} +{"current_steps": 17170, "total_steps": 28254, "loss": 0.722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.671472999961949e-05, "epoch": 1.22, "percentage": 60.77, "elapsed_time": "1 day, 5:46:34", "remaining_time": "19:13:18"} +{"current_steps": 17180, "total_steps": 28254, "loss": 0.7278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.668850830547345e-05, "epoch": 1.22, "percentage": 60.81, "elapsed_time": "1 day, 5:47:35", "remaining_time": "19:12:15"} +{"current_steps": 17190, "total_steps": 28254, "loss": 0.7161, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6662296887201967e-05, "epoch": 1.22, "percentage": 60.84, "elapsed_time": "1 day, 5:48:36", "remaining_time": "19:11:12"} +{"current_steps": 17200, "total_steps": 28254, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6636095777211413e-05, "epoch": 1.22, "percentage": 60.88, "elapsed_time": "1 day, 5:49:40", "remaining_time": "19:10:10"} +{"current_steps": 17210, "total_steps": 28254, "loss": 0.7356, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.660990500789541e-05, "epoch": 1.22, "percentage": 60.91, "elapsed_time": "1 day, 5:50:40", "remaining_time": "19:09:06"} +{"current_steps": 17220, "total_steps": 28254, "loss": 0.7245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6583724611634804e-05, "epoch": 1.22, "percentage": 60.95, "elapsed_time": "1 day, 5:51:45", "remaining_time": "19:08:06"} +{"current_steps": 17230, "total_steps": 28254, "loss": 0.6979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6557554620797596e-05, "epoch": 1.22, "percentage": 60.98, "elapsed_time": "1 day, 5:52:50", "remaining_time": "19:07:04"} +{"current_steps": 17240, "total_steps": 28254, "loss": 0.6995, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6531395067738934e-05, "epoch": 1.22, "percentage": 61.02, "elapsed_time": "1 day, 5:53:53", "remaining_time": "19:06:03"} +{"current_steps": 17250, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.650524598480106e-05, "epoch": 1.22, "percentage": 61.05, "elapsed_time": "1 day, 5:54:54", "remaining_time": "19:04:59"} +{"current_steps": 17260, "total_steps": 28254, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.647910740431329e-05, "epoch": 1.22, "percentage": 61.09, "elapsed_time": "1 day, 5:55:57", "remaining_time": "19:03:57"} +{"current_steps": 17270, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.645297935859192e-05, "epoch": 1.22, "percentage": 61.12, "elapsed_time": "1 day, 5:56:59", "remaining_time": "19:02:55"} +{"current_steps": 17280, "total_steps": 28254, "loss": 0.7095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6426861879940235e-05, "epoch": 1.22, "percentage": 61.16, "elapsed_time": "1 day, 5:58:03", "remaining_time": "19:01:53"} +{"current_steps": 17290, "total_steps": 28254, "loss": 0.7315, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.640075500064848e-05, "epoch": 1.22, "percentage": 61.19, "elapsed_time": "1 day, 5:59:05", "remaining_time": "19:00:51"} +{"current_steps": 17300, "total_steps": 28254, "loss": 0.7221, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.637465875299376e-05, "epoch": 1.22, "percentage": 61.23, "elapsed_time": "1 day, 6:00:08", "remaining_time": "18:59:48"} +{"current_steps": 17310, "total_steps": 28254, "loss": 0.7424, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.634857316924006e-05, "epoch": 1.23, "percentage": 61.27, "elapsed_time": "1 day, 6:01:11", "remaining_time": "18:58:46"} +{"current_steps": 17320, "total_steps": 28254, "loss": 0.7475, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.632249828163816e-05, "epoch": 1.23, "percentage": 61.3, "elapsed_time": "1 day, 6:02:10", "remaining_time": "18:57:42"} +{"current_steps": 17330, "total_steps": 28254, "loss": 0.7208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6296434122425638e-05, "epoch": 1.23, "percentage": 61.34, "elapsed_time": "1 day, 6:03:13", "remaining_time": "18:56:39"} +{"current_steps": 17340, "total_steps": 28254, "loss": 0.7181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.627038072382679e-05, "epoch": 1.23, "percentage": 61.37, "elapsed_time": "1 day, 6:04:15", "remaining_time": "18:55:37"} +{"current_steps": 17350, "total_steps": 28254, "loss": 0.7212, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6244338118052632e-05, "epoch": 1.23, "percentage": 61.41, "elapsed_time": "1 day, 6:05:16", "remaining_time": "18:54:33"} +{"current_steps": 17360, "total_steps": 28254, "loss": 0.7071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.621830633730079e-05, "epoch": 1.23, "percentage": 61.44, "elapsed_time": "1 day, 6:06:20", "remaining_time": "18:53:32"} +{"current_steps": 17370, "total_steps": 28254, "loss": 0.7225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6192285413755564e-05, "epoch": 1.23, "percentage": 61.48, "elapsed_time": "1 day, 6:07:22", "remaining_time": "18:52:29"} +{"current_steps": 17380, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6166275379587786e-05, "epoch": 1.23, "percentage": 61.51, "elapsed_time": "1 day, 6:08:26", "remaining_time": "18:51:28"} +{"current_steps": 17390, "total_steps": 28254, "loss": 0.7502, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6140276266954864e-05, "epoch": 1.23, "percentage": 61.55, "elapsed_time": "1 day, 6:09:30", "remaining_time": "18:50:26"} +{"current_steps": 17400, "total_steps": 28254, "loss": 0.7212, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.611428810800065e-05, "epoch": 1.23, "percentage": 61.58, "elapsed_time": "1 day, 6:10:34", "remaining_time": "18:49:25"} +{"current_steps": 17410, "total_steps": 28254, "loss": 0.7458, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.608831093485551e-05, "epoch": 1.23, "percentage": 61.62, "elapsed_time": "1 day, 6:11:38", "remaining_time": "18:48:24"} +{"current_steps": 17420, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.606234477963619e-05, "epoch": 1.23, "percentage": 61.65, "elapsed_time": "1 day, 6:12:40", "remaining_time": "18:47:21"} +{"current_steps": 17430, "total_steps": 28254, "loss": 0.7317, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6036389674445838e-05, "epoch": 1.23, "percentage": 61.69, "elapsed_time": "1 day, 6:13:41", "remaining_time": "18:46:17"} +{"current_steps": 17440, "total_steps": 28254, "loss": 0.7232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6010445651373918e-05, "epoch": 1.23, "percentage": 61.73, "elapsed_time": "1 day, 6:14:42", "remaining_time": "18:45:14"} +{"current_steps": 17450, "total_steps": 28254, "loss": 0.6824, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.598451274249621e-05, "epoch": 1.24, "percentage": 61.76, "elapsed_time": "1 day, 6:15:44", "remaining_time": "18:44:11"} +{"current_steps": 17460, "total_steps": 28254, "loss": 0.6917, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5958590979874733e-05, "epoch": 1.24, "percentage": 61.8, "elapsed_time": "1 day, 6:16:49", "remaining_time": "18:43:10"} +{"current_steps": 17470, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5932680395557765e-05, "epoch": 1.24, "percentage": 61.83, "elapsed_time": "1 day, 6:17:50", "remaining_time": "18:42:08"} +{"current_steps": 17480, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.590678102157972e-05, "epoch": 1.24, "percentage": 61.87, "elapsed_time": "1 day, 6:18:53", "remaining_time": "18:41:05"} +{"current_steps": 17490, "total_steps": 28254, "loss": 0.7067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5880892889961164e-05, "epoch": 1.24, "percentage": 61.9, "elapsed_time": "1 day, 6:19:53", "remaining_time": "18:40:02"} +{"current_steps": 17500, "total_steps": 28254, "loss": 0.7049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5855016032708787e-05, "epoch": 1.24, "percentage": 61.94, "elapsed_time": "1 day, 6:20:55", "remaining_time": "18:38:59"} +{"current_steps": 17510, "total_steps": 28254, "loss": 0.703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.582915048181532e-05, "epoch": 1.24, "percentage": 61.97, "elapsed_time": "1 day, 6:22:00", "remaining_time": "18:37:57"} +{"current_steps": 17520, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5803296269259503e-05, "epoch": 1.24, "percentage": 62.01, "elapsed_time": "1 day, 6:23:00", "remaining_time": "18:36:54"} +{"current_steps": 17530, "total_steps": 28254, "loss": 0.6889, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5777453427006084e-05, "epoch": 1.24, "percentage": 62.04, "elapsed_time": "1 day, 6:24:03", "remaining_time": "18:35:52"} +{"current_steps": 17540, "total_steps": 28254, "loss": 0.7249, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5751621987005742e-05, "epoch": 1.24, "percentage": 62.08, "elapsed_time": "1 day, 6:25:06", "remaining_time": "18:34:49"} +{"current_steps": 17550, "total_steps": 28254, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5725801981195062e-05, "epoch": 1.24, "percentage": 62.12, "elapsed_time": "1 day, 6:26:08", "remaining_time": "18:33:47"} +{"current_steps": 17560, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.569999344149648e-05, "epoch": 1.24, "percentage": 62.15, "elapsed_time": "1 day, 6:27:09", "remaining_time": "18:32:44"} +{"current_steps": 17570, "total_steps": 28254, "loss": 0.7149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.567419639981827e-05, "epoch": 1.24, "percentage": 62.19, "elapsed_time": "1 day, 6:28:13", "remaining_time": "18:31:42"} +{"current_steps": 17580, "total_steps": 28254, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5648410888054487e-05, "epoch": 1.24, "percentage": 62.22, "elapsed_time": "1 day, 6:29:15", "remaining_time": "18:30:39"} +{"current_steps": 17590, "total_steps": 28254, "loss": 0.7139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5622636938084927e-05, "epoch": 1.25, "percentage": 62.26, "elapsed_time": "1 day, 6:30:18", "remaining_time": "18:29:37"} +{"current_steps": 17600, "total_steps": 28254, "loss": 0.722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5596874581775112e-05, "epoch": 1.25, "percentage": 62.29, "elapsed_time": "1 day, 6:31:18", "remaining_time": "18:28:34"} +{"current_steps": 17610, "total_steps": 28254, "loss": 0.6979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5571123850976184e-05, "epoch": 1.25, "percentage": 62.33, "elapsed_time": "1 day, 6:32:22", "remaining_time": "18:27:32"} +{"current_steps": 17620, "total_steps": 28254, "loss": 0.7257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5545384777524958e-05, "epoch": 1.25, "percentage": 62.36, "elapsed_time": "1 day, 6:33:26", "remaining_time": "18:26:31"} +{"current_steps": 17630, "total_steps": 28254, "loss": 0.7112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.551965739324381e-05, "epoch": 1.25, "percentage": 62.4, "elapsed_time": "1 day, 6:34:28", "remaining_time": "18:25:28"} +{"current_steps": 17640, "total_steps": 28254, "loss": 0.7098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.549394172994069e-05, "epoch": 1.25, "percentage": 62.43, "elapsed_time": "1 day, 6:35:31", "remaining_time": "18:24:26"} +{"current_steps": 17650, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5468237819409028e-05, "epoch": 1.25, "percentage": 62.47, "elapsed_time": "1 day, 6:36:33", "remaining_time": "18:23:23"} +{"current_steps": 17660, "total_steps": 28254, "loss": 0.7317, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5442545693427733e-05, "epoch": 1.25, "percentage": 62.5, "elapsed_time": "1 day, 6:37:35", "remaining_time": "18:22:20"} +{"current_steps": 17670, "total_steps": 28254, "loss": 0.6859, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5416865383761147e-05, "epoch": 1.25, "percentage": 62.54, "elapsed_time": "1 day, 6:38:38", "remaining_time": "18:21:18"} +{"current_steps": 17680, "total_steps": 28254, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.539119692215902e-05, "epoch": 1.25, "percentage": 62.58, "elapsed_time": "1 day, 6:39:41", "remaining_time": "18:20:16"} +{"current_steps": 17690, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5365540340356415e-05, "epoch": 1.25, "percentage": 62.61, "elapsed_time": "1 day, 6:40:47", "remaining_time": "18:19:16"} +{"current_steps": 17700, "total_steps": 28254, "loss": 0.6882, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.533989567007374e-05, "epoch": 1.25, "percentage": 62.65, "elapsed_time": "1 day, 6:41:49", "remaining_time": "18:18:13"} +{"current_steps": 17710, "total_steps": 28254, "loss": 0.6969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5314262943016654e-05, "epoch": 1.25, "percentage": 62.68, "elapsed_time": "1 day, 6:42:51", "remaining_time": "18:17:10"} +{"current_steps": 17720, "total_steps": 28254, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5288642190876086e-05, "epoch": 1.25, "percentage": 62.72, "elapsed_time": "1 day, 6:43:52", "remaining_time": "18:16:07"} +{"current_steps": 17730, "total_steps": 28254, "loss": 0.7349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.526303344532811e-05, "epoch": 1.25, "percentage": 62.75, "elapsed_time": "1 day, 6:44:55", "remaining_time": "18:15:05"} +{"current_steps": 17740, "total_steps": 28254, "loss": 0.7341, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5237436738033984e-05, "epoch": 1.26, "percentage": 62.79, "elapsed_time": "1 day, 6:45:59", "remaining_time": "18:14:03"} +{"current_steps": 17750, "total_steps": 28254, "loss": 0.7143, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5211852100640095e-05, "epoch": 1.26, "percentage": 62.82, "elapsed_time": "1 day, 6:47:01", "remaining_time": "18:13:01"} +{"current_steps": 17760, "total_steps": 28254, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5186279564777883e-05, "epoch": 1.26, "percentage": 62.86, "elapsed_time": "1 day, 6:48:03", "remaining_time": "18:11:58"} +{"current_steps": 17770, "total_steps": 28254, "loss": 0.6913, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.516071916206383e-05, "epoch": 1.26, "percentage": 62.89, "elapsed_time": "1 day, 6:49:05", "remaining_time": "18:10:55"} +{"current_steps": 17780, "total_steps": 28254, "loss": 0.7063, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5135170924099423e-05, "epoch": 1.26, "percentage": 62.93, "elapsed_time": "1 day, 6:50:06", "remaining_time": "18:09:52"} +{"current_steps": 17790, "total_steps": 28254, "loss": 0.7095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5109634882471118e-05, "epoch": 1.26, "percentage": 62.96, "elapsed_time": "1 day, 6:51:12", "remaining_time": "18:08:52"} +{"current_steps": 17800, "total_steps": 28254, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5084111068750283e-05, "epoch": 1.26, "percentage": 63.0, "elapsed_time": "1 day, 6:52:18", "remaining_time": "18:07:51"} +{"current_steps": 17810, "total_steps": 28254, "loss": 0.7433, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5058599514493158e-05, "epoch": 1.26, "percentage": 63.04, "elapsed_time": "1 day, 6:53:19", "remaining_time": "18:06:48"} +{"current_steps": 17820, "total_steps": 28254, "loss": 0.6966, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5033100251240833e-05, "epoch": 1.26, "percentage": 63.07, "elapsed_time": "1 day, 6:54:19", "remaining_time": "18:05:45"} +{"current_steps": 17830, "total_steps": 28254, "loss": 0.7162, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.500761331051922e-05, "epoch": 1.26, "percentage": 63.11, "elapsed_time": "1 day, 6:55:22", "remaining_time": "18:04:43"} +{"current_steps": 17840, "total_steps": 28254, "loss": 0.7126, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4982138723838973e-05, "epoch": 1.26, "percentage": 63.14, "elapsed_time": "1 day, 6:56:26", "remaining_time": "18:03:40"} +{"current_steps": 17850, "total_steps": 28254, "loss": 0.6977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4956676522695478e-05, "epoch": 1.26, "percentage": 63.18, "elapsed_time": "1 day, 6:57:29", "remaining_time": "18:02:39"} +{"current_steps": 17860, "total_steps": 28254, "loss": 0.6931, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.493122673856881e-05, "epoch": 1.26, "percentage": 63.21, "elapsed_time": "1 day, 6:58:32", "remaining_time": "18:01:37"} +{"current_steps": 17870, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4905789402923697e-05, "epoch": 1.26, "percentage": 63.25, "elapsed_time": "1 day, 6:59:33", "remaining_time": "18:00:33"} +{"current_steps": 17880, "total_steps": 28254, "loss": 0.7247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4880364547209466e-05, "epoch": 1.27, "percentage": 63.28, "elapsed_time": "1 day, 7:00:36", "remaining_time": "17:59:31"} +{"current_steps": 17890, "total_steps": 28254, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4854952202860033e-05, "epoch": 1.27, "percentage": 63.32, "elapsed_time": "1 day, 7:01:38", "remaining_time": "17:58:28"} +{"current_steps": 17900, "total_steps": 28254, "loss": 0.7011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4829552401293822e-05, "epoch": 1.27, "percentage": 63.35, "elapsed_time": "1 day, 7:02:40", "remaining_time": "17:57:26"} +{"current_steps": 17910, "total_steps": 28254, "loss": 0.7118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4804165173913764e-05, "epoch": 1.27, "percentage": 63.39, "elapsed_time": "1 day, 7:03:42", "remaining_time": "17:56:23"} +{"current_steps": 17920, "total_steps": 28254, "loss": 0.6924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4778790552107236e-05, "epoch": 1.27, "percentage": 63.42, "elapsed_time": "1 day, 7:04:45", "remaining_time": "17:55:21"} +{"current_steps": 17930, "total_steps": 28254, "loss": 0.72, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4753428567246052e-05, "epoch": 1.27, "percentage": 63.46, "elapsed_time": "1 day, 7:05:47", "remaining_time": "17:54:19"} +{"current_steps": 17940, "total_steps": 28254, "loss": 0.7124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4728079250686366e-05, "epoch": 1.27, "percentage": 63.5, "elapsed_time": "1 day, 7:06:50", "remaining_time": "17:53:16"} +{"current_steps": 17950, "total_steps": 28254, "loss": 0.7015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.470274263376869e-05, "epoch": 1.27, "percentage": 63.53, "elapsed_time": "1 day, 7:07:50", "remaining_time": "17:52:12"} +{"current_steps": 17960, "total_steps": 28254, "loss": 0.7289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4677418747817847e-05, "epoch": 1.27, "percentage": 63.57, "elapsed_time": "1 day, 7:08:53", "remaining_time": "17:51:10"} +{"current_steps": 17970, "total_steps": 28254, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4652107624142908e-05, "epoch": 1.27, "percentage": 63.6, "elapsed_time": "1 day, 7:09:55", "remaining_time": "17:50:07"} +{"current_steps": 17980, "total_steps": 28254, "loss": 0.7018, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4626809294037147e-05, "epoch": 1.27, "percentage": 63.64, "elapsed_time": "1 day, 7:10:56", "remaining_time": "17:49:04"} +{"current_steps": 17990, "total_steps": 28254, "loss": 0.7282, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4601523788778043e-05, "epoch": 1.27, "percentage": 63.67, "elapsed_time": "1 day, 7:11:58", "remaining_time": "17:48:02"} +{"current_steps": 18000, "total_steps": 28254, "loss": 0.6876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4576251139627222e-05, "epoch": 1.27, "percentage": 63.71, "elapsed_time": "1 day, 7:13:01", "remaining_time": "17:46:59"} +{"current_steps": 18010, "total_steps": 28254, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4550991377830426e-05, "epoch": 1.27, "percentage": 63.74, "elapsed_time": "1 day, 7:14:04", "remaining_time": "17:45:58"} +{"current_steps": 18020, "total_steps": 28254, "loss": 0.7015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4525744534617402e-05, "epoch": 1.28, "percentage": 63.78, "elapsed_time": "1 day, 7:15:07", "remaining_time": "17:44:55"} +{"current_steps": 18030, "total_steps": 28254, "loss": 0.7316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.450051064120199e-05, "epoch": 1.28, "percentage": 63.81, "elapsed_time": "1 day, 7:16:08", "remaining_time": "17:43:52"} +{"current_steps": 18040, "total_steps": 28254, "loss": 0.7131, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4475289728782e-05, "epoch": 1.28, "percentage": 63.85, "elapsed_time": "1 day, 7:17:11", "remaining_time": "17:42:50"} +{"current_steps": 18050, "total_steps": 28254, "loss": 0.7294, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4450081828539208e-05, "epoch": 1.28, "percentage": 63.88, "elapsed_time": "1 day, 7:18:11", "remaining_time": "17:41:46"} +{"current_steps": 18060, "total_steps": 28254, "loss": 0.7204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.442488697163925e-05, "epoch": 1.28, "percentage": 63.92, "elapsed_time": "1 day, 7:19:14", "remaining_time": "17:40:44"} +{"current_steps": 18070, "total_steps": 28254, "loss": 0.7443, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4399705189231691e-05, "epoch": 1.28, "percentage": 63.96, "elapsed_time": "1 day, 7:20:16", "remaining_time": "17:39:41"} +{"current_steps": 18080, "total_steps": 28254, "loss": 0.6726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.437453651244991e-05, "epoch": 1.28, "percentage": 63.99, "elapsed_time": "1 day, 7:21:17", "remaining_time": "17:38:38"} +{"current_steps": 18090, "total_steps": 28254, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4349380972411092e-05, "epoch": 1.28, "percentage": 64.03, "elapsed_time": "1 day, 7:22:17", "remaining_time": "17:37:34"} +{"current_steps": 18100, "total_steps": 28254, "loss": 0.7131, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4324238600216167e-05, "epoch": 1.28, "percentage": 64.06, "elapsed_time": "1 day, 7:23:17", "remaining_time": "17:36:31"} +{"current_steps": 18110, "total_steps": 28254, "loss": 0.7373, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4299109426949784e-05, "epoch": 1.28, "percentage": 64.1, "elapsed_time": "1 day, 7:24:20", "remaining_time": "17:35:28"} +{"current_steps": 18120, "total_steps": 28254, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4273993483680287e-05, "epoch": 1.28, "percentage": 64.13, "elapsed_time": "1 day, 7:25:22", "remaining_time": "17:34:26"} +{"current_steps": 18130, "total_steps": 28254, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4248890801459664e-05, "epoch": 1.28, "percentage": 64.17, "elapsed_time": "1 day, 7:26:23", "remaining_time": "17:33:22"} +{"current_steps": 18140, "total_steps": 28254, "loss": 0.7327, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4223801411323497e-05, "epoch": 1.28, "percentage": 64.2, "elapsed_time": "1 day, 7:27:25", "remaining_time": "17:32:20"} +{"current_steps": 18150, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4198725344290928e-05, "epoch": 1.28, "percentage": 64.24, "elapsed_time": "1 day, 7:28:28", "remaining_time": "17:31:17"} +{"current_steps": 18160, "total_steps": 28254, "loss": 0.7035, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4173662631364643e-05, "epoch": 1.29, "percentage": 64.27, "elapsed_time": "1 day, 7:29:29", "remaining_time": "17:30:15"} +{"current_steps": 18170, "total_steps": 28254, "loss": 0.7009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4148613303530822e-05, "epoch": 1.29, "percentage": 64.31, "elapsed_time": "1 day, 7:30:31", "remaining_time": "17:29:12"} +{"current_steps": 18180, "total_steps": 28254, "loss": 0.6923, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4123577391759083e-05, "epoch": 1.29, "percentage": 64.34, "elapsed_time": "1 day, 7:31:36", "remaining_time": "17:28:11"} +{"current_steps": 18190, "total_steps": 28254, "loss": 0.6946, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4098554927002444e-05, "epoch": 1.29, "percentage": 64.38, "elapsed_time": "1 day, 7:32:39", "remaining_time": "17:27:09"} +{"current_steps": 18200, "total_steps": 28254, "loss": 0.7287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4073545940197325e-05, "epoch": 1.29, "percentage": 64.42, "elapsed_time": "1 day, 7:33:42", "remaining_time": "17:26:07"} +{"current_steps": 18210, "total_steps": 28254, "loss": 0.6951, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4048550462263482e-05, "epoch": 1.29, "percentage": 64.45, "elapsed_time": "1 day, 7:34:45", "remaining_time": "17:25:04"} +{"current_steps": 18220, "total_steps": 28254, "loss": 0.7234, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4023568524103953e-05, "epoch": 1.29, "percentage": 64.49, "elapsed_time": "1 day, 7:35:47", "remaining_time": "17:24:02"} +{"current_steps": 18230, "total_steps": 28254, "loss": 0.6795, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.399860015660503e-05, "epoch": 1.29, "percentage": 64.52, "elapsed_time": "1 day, 7:36:49", "remaining_time": "17:22:59"} +{"current_steps": 18240, "total_steps": 28254, "loss": 0.7257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3973645390636248e-05, "epoch": 1.29, "percentage": 64.56, "elapsed_time": "1 day, 7:37:53", "remaining_time": "17:21:58"} +{"current_steps": 18250, "total_steps": 28254, "loss": 0.7613, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3948704257050315e-05, "epoch": 1.29, "percentage": 64.59, "elapsed_time": "1 day, 7:38:55", "remaining_time": "17:20:55"} +{"current_steps": 18260, "total_steps": 28254, "loss": 0.6848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3923776786683118e-05, "epoch": 1.29, "percentage": 64.63, "elapsed_time": "1 day, 7:39:56", "remaining_time": "17:19:52"} +{"current_steps": 18270, "total_steps": 28254, "loss": 0.7101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3898863010353569e-05, "epoch": 1.29, "percentage": 64.66, "elapsed_time": "1 day, 7:40:59", "remaining_time": "17:18:49"} +{"current_steps": 18280, "total_steps": 28254, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3873962958863723e-05, "epoch": 1.29, "percentage": 64.7, "elapsed_time": "1 day, 7:42:00", "remaining_time": "17:17:46"} +{"current_steps": 18290, "total_steps": 28254, "loss": 0.7305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3849076662998648e-05, "epoch": 1.29, "percentage": 64.73, "elapsed_time": "1 day, 7:43:02", "remaining_time": "17:16:44"} +{"current_steps": 18300, "total_steps": 28254, "loss": 0.7449, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3824204153526407e-05, "epoch": 1.3, "percentage": 64.77, "elapsed_time": "1 day, 7:44:05", "remaining_time": "17:15:41"} +{"current_steps": 18310, "total_steps": 28254, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3799345461198006e-05, "epoch": 1.3, "percentage": 64.8, "elapsed_time": "1 day, 7:45:05", "remaining_time": "17:14:38"} +{"current_steps": 18320, "total_steps": 28254, "loss": 0.6939, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3774500616747366e-05, "epoch": 1.3, "percentage": 64.84, "elapsed_time": "1 day, 7:46:08", "remaining_time": "17:13:35"} +{"current_steps": 18330, "total_steps": 28254, "loss": 0.7017, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3749669650891306e-05, "epoch": 1.3, "percentage": 64.88, "elapsed_time": "1 day, 7:47:13", "remaining_time": "17:12:35"} +{"current_steps": 18340, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3724852594329482e-05, "epoch": 1.3, "percentage": 64.91, "elapsed_time": "1 day, 7:48:15", "remaining_time": "17:11:32"} +{"current_steps": 18350, "total_steps": 28254, "loss": 0.695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3700049477744343e-05, "epoch": 1.3, "percentage": 64.95, "elapsed_time": "1 day, 7:49:17", "remaining_time": "17:10:29"} +{"current_steps": 18360, "total_steps": 28254, "loss": 0.7316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3675260331801093e-05, "epoch": 1.3, "percentage": 64.98, "elapsed_time": "1 day, 7:50:22", "remaining_time": "17:09:28"} +{"current_steps": 18370, "total_steps": 28254, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3650485187147694e-05, "epoch": 1.3, "percentage": 65.02, "elapsed_time": "1 day, 7:51:25", "remaining_time": "17:08:26"} +{"current_steps": 18380, "total_steps": 28254, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3625724074414792e-05, "epoch": 1.3, "percentage": 65.05, "elapsed_time": "1 day, 7:52:27", "remaining_time": "17:07:23"} +{"current_steps": 18390, "total_steps": 28254, "loss": 0.7163, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3600977024215658e-05, "epoch": 1.3, "percentage": 65.09, "elapsed_time": "1 day, 7:53:29", "remaining_time": "17:06:21"} +{"current_steps": 18400, "total_steps": 28254, "loss": 0.7016, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3576244067146193e-05, "epoch": 1.3, "percentage": 65.12, "elapsed_time": "1 day, 7:54:33", "remaining_time": "17:05:19"} +{"current_steps": 18410, "total_steps": 28254, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3551525233784879e-05, "epoch": 1.3, "percentage": 65.16, "elapsed_time": "1 day, 7:55:36", "remaining_time": "17:04:17"} +{"current_steps": 18420, "total_steps": 28254, "loss": 0.6948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3526820554692743e-05, "epoch": 1.3, "percentage": 65.19, "elapsed_time": "1 day, 7:56:40", "remaining_time": "17:03:15"} +{"current_steps": 18430, "total_steps": 28254, "loss": 0.7157, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3502130060413293e-05, "epoch": 1.3, "percentage": 65.23, "elapsed_time": "1 day, 7:57:43", "remaining_time": "17:02:13"} +{"current_steps": 18440, "total_steps": 28254, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.34774537814725e-05, "epoch": 1.31, "percentage": 65.27, "elapsed_time": "1 day, 7:58:45", "remaining_time": "17:01:11"} +{"current_steps": 18450, "total_steps": 28254, "loss": 0.7092, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3452791748378767e-05, "epoch": 1.31, "percentage": 65.3, "elapsed_time": "1 day, 7:59:48", "remaining_time": "17:00:09"} +{"current_steps": 18460, "total_steps": 28254, "loss": 0.728, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3428143991622902e-05, "epoch": 1.31, "percentage": 65.34, "elapsed_time": "1 day, 8:00:49", "remaining_time": "16:59:06"} +{"current_steps": 18470, "total_steps": 28254, "loss": 0.7247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3403510541678055e-05, "epoch": 1.31, "percentage": 65.37, "elapsed_time": "1 day, 8:01:52", "remaining_time": "16:58:03"} +{"current_steps": 18480, "total_steps": 28254, "loss": 0.7027, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3381352694222871e-05, "epoch": 1.31, "percentage": 65.41, "elapsed_time": "1 day, 8:02:55", "remaining_time": "16:57:01"} +{"current_steps": 18490, "total_steps": 28254, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3356746511109036e-05, "epoch": 1.31, "percentage": 65.44, "elapsed_time": "1 day, 8:03:58", "remaining_time": "16:55:59"} +{"current_steps": 18500, "total_steps": 28254, "loss": 0.7383, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3332154723078139e-05, "epoch": 1.31, "percentage": 65.48, "elapsed_time": "1 day, 8:05:01", "remaining_time": "16:54:57"} +{"current_steps": 18510, "total_steps": 28254, "loss": 0.7356, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3307577360534146e-05, "epoch": 1.31, "percentage": 65.51, "elapsed_time": "1 day, 8:06:03", "remaining_time": "16:53:54"} +{"current_steps": 18520, "total_steps": 28254, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3283014453863141e-05, "epoch": 1.31, "percentage": 65.55, "elapsed_time": "1 day, 8:07:05", "remaining_time": "16:52:51"} +{"current_steps": 18530, "total_steps": 28254, "loss": 0.7231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3258466033433384e-05, "epoch": 1.31, "percentage": 65.58, "elapsed_time": "1 day, 8:08:07", "remaining_time": "16:51:49"} +{"current_steps": 18540, "total_steps": 28254, "loss": 0.6927, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.323393212959518e-05, "epoch": 1.31, "percentage": 65.62, "elapsed_time": "1 day, 8:09:09", "remaining_time": "16:50:46"} +{"current_steps": 18550, "total_steps": 28254, "loss": 0.7004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.320941277268093e-05, "epoch": 1.31, "percentage": 65.65, "elapsed_time": "1 day, 8:10:10", "remaining_time": "16:49:43"} +{"current_steps": 18560, "total_steps": 28254, "loss": 0.6777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3184907993005007e-05, "epoch": 1.31, "percentage": 65.69, "elapsed_time": "1 day, 8:11:12", "remaining_time": "16:48:40"} +{"current_steps": 18570, "total_steps": 28254, "loss": 0.6808, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3160417820863807e-05, "epoch": 1.31, "percentage": 65.73, "elapsed_time": "1 day, 8:12:15", "remaining_time": "16:47:38"} +{"current_steps": 18580, "total_steps": 28254, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3135942286535619e-05, "epoch": 1.32, "percentage": 65.76, "elapsed_time": "1 day, 8:13:18", "remaining_time": "16:46:36"} +{"current_steps": 18590, "total_steps": 28254, "loss": 0.7246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3111481420280675e-05, "epoch": 1.32, "percentage": 65.8, "elapsed_time": "1 day, 8:14:19", "remaining_time": "16:45:33"} +{"current_steps": 18600, "total_steps": 28254, "loss": 0.6971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3087035252341035e-05, "epoch": 1.32, "percentage": 65.83, "elapsed_time": "1 day, 8:15:20", "remaining_time": "16:44:30"} +{"current_steps": 18610, "total_steps": 28254, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3062603812940616e-05, "epoch": 1.32, "percentage": 65.87, "elapsed_time": "1 day, 8:16:23", "remaining_time": "16:43:28"} +{"current_steps": 18620, "total_steps": 28254, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.303818713228513e-05, "epoch": 1.32, "percentage": 65.9, "elapsed_time": "1 day, 8:17:24", "remaining_time": "16:42:25"} +{"current_steps": 18630, "total_steps": 28254, "loss": 0.6891, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3013785240562015e-05, "epoch": 1.32, "percentage": 65.94, "elapsed_time": "1 day, 8:18:24", "remaining_time": "16:41:21"} +{"current_steps": 18640, "total_steps": 28254, "loss": 0.7273, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.298939816794043e-05, "epoch": 1.32, "percentage": 65.97, "elapsed_time": "1 day, 8:19:27", "remaining_time": "16:40:19"} +{"current_steps": 18650, "total_steps": 28254, "loss": 0.7345, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2965025944571228e-05, "epoch": 1.32, "percentage": 66.01, "elapsed_time": "1 day, 8:20:29", "remaining_time": "16:39:16"} +{"current_steps": 18660, "total_steps": 28254, "loss": 0.7106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2940668600586902e-05, "epoch": 1.32, "percentage": 66.04, "elapsed_time": "1 day, 8:21:33", "remaining_time": "16:38:15"} +{"current_steps": 18670, "total_steps": 28254, "loss": 0.6933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.291632616610154e-05, "epoch": 1.32, "percentage": 66.08, "elapsed_time": "1 day, 8:22:35", "remaining_time": "16:37:12"} +{"current_steps": 18680, "total_steps": 28254, "loss": 0.6973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2891998671210787e-05, "epoch": 1.32, "percentage": 66.11, "elapsed_time": "1 day, 8:23:40", "remaining_time": "16:36:10"} +{"current_steps": 18690, "total_steps": 28254, "loss": 0.7173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2867686145991831e-05, "epoch": 1.32, "percentage": 66.15, "elapsed_time": "1 day, 8:24:39", "remaining_time": "16:35:07"} +{"current_steps": 18700, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2843388620503371e-05, "epoch": 1.32, "percentage": 66.19, "elapsed_time": "1 day, 8:25:42", "remaining_time": "16:34:04"} +{"current_steps": 18710, "total_steps": 28254, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2819106124785518e-05, "epoch": 1.32, "percentage": 66.22, "elapsed_time": "1 day, 8:26:45", "remaining_time": "16:33:02"} +{"current_steps": 18720, "total_steps": 28254, "loss": 0.7301, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2794838688859845e-05, "epoch": 1.33, "percentage": 66.26, "elapsed_time": "1 day, 8:27:46", "remaining_time": "16:31:59"} +{"current_steps": 18730, "total_steps": 28254, "loss": 0.7166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.277058634272926e-05, "epoch": 1.33, "percentage": 66.29, "elapsed_time": "1 day, 8:28:48", "remaining_time": "16:30:57"} +{"current_steps": 18740, "total_steps": 28254, "loss": 0.7011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2746349116378064e-05, "epoch": 1.33, "percentage": 66.33, "elapsed_time": "1 day, 8:29:53", "remaining_time": "16:29:55"} +{"current_steps": 18750, "total_steps": 28254, "loss": 0.7219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2722127039771819e-05, "epoch": 1.33, "percentage": 66.36, "elapsed_time": "1 day, 8:30:57", "remaining_time": "16:28:54"} +{"current_steps": 18760, "total_steps": 28254, "loss": 0.7132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.26979201428574e-05, "epoch": 1.33, "percentage": 66.4, "elapsed_time": "1 day, 8:31:58", "remaining_time": "16:27:51"} +{"current_steps": 18770, "total_steps": 28254, "loss": 0.746, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.267372845556287e-05, "epoch": 1.33, "percentage": 66.43, "elapsed_time": "1 day, 8:33:01", "remaining_time": "16:26:48"} +{"current_steps": 18780, "total_steps": 28254, "loss": 0.7277, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2649552007797533e-05, "epoch": 1.33, "percentage": 66.47, "elapsed_time": "1 day, 8:34:03", "remaining_time": "16:25:46"} +{"current_steps": 18790, "total_steps": 28254, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2625390829451805e-05, "epoch": 1.33, "percentage": 66.5, "elapsed_time": "1 day, 8:35:07", "remaining_time": "16:24:44"} +{"current_steps": 18800, "total_steps": 28254, "loss": 0.7349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2601244950397273e-05, "epoch": 1.33, "percentage": 66.54, "elapsed_time": "1 day, 8:36:10", "remaining_time": "16:23:42"} +{"current_steps": 18810, "total_steps": 28254, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2577114400486561e-05, "epoch": 1.33, "percentage": 66.57, "elapsed_time": "1 day, 8:37:11", "remaining_time": "16:22:39"} +{"current_steps": 18820, "total_steps": 28254, "loss": 0.7071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2552999209553385e-05, "epoch": 1.33, "percentage": 66.61, "elapsed_time": "1 day, 8:38:12", "remaining_time": "16:21:36"} +{"current_steps": 18830, "total_steps": 28254, "loss": 0.7241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2528899407412426e-05, "epoch": 1.33, "percentage": 66.65, "elapsed_time": "1 day, 8:39:13", "remaining_time": "16:20:33"} +{"current_steps": 18840, "total_steps": 28254, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2504815023859387e-05, "epoch": 1.33, "percentage": 66.68, "elapsed_time": "1 day, 8:40:14", "remaining_time": "16:19:29"} +{"current_steps": 18850, "total_steps": 28254, "loss": 0.6909, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2480746088670866e-05, "epoch": 1.33, "percentage": 66.72, "elapsed_time": "1 day, 8:41:16", "remaining_time": "16:18:26"} +{"current_steps": 18860, "total_steps": 28254, "loss": 0.7326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2456692631604392e-05, "epoch": 1.33, "percentage": 66.75, "elapsed_time": "1 day, 8:42:20", "remaining_time": "16:17:25"} +{"current_steps": 18870, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2432654682398348e-05, "epoch": 1.34, "percentage": 66.79, "elapsed_time": "1 day, 8:43:20", "remaining_time": "16:16:22"} +{"current_steps": 18880, "total_steps": 28254, "loss": 0.6932, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2408632270771941e-05, "epoch": 1.34, "percentage": 66.82, "elapsed_time": "1 day, 8:44:21", "remaining_time": "16:15:18"} +{"current_steps": 18890, "total_steps": 28254, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2384625426425156e-05, "epoch": 1.34, "percentage": 66.86, "elapsed_time": "1 day, 8:45:23", "remaining_time": "16:14:16"} +{"current_steps": 18900, "total_steps": 28254, "loss": 0.7001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2360634179038751e-05, "epoch": 1.34, "percentage": 66.89, "elapsed_time": "1 day, 8:46:27", "remaining_time": "16:13:14"} +{"current_steps": 18910, "total_steps": 28254, "loss": 0.6793, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2336658558274211e-05, "epoch": 1.34, "percentage": 66.93, "elapsed_time": "1 day, 8:47:28", "remaining_time": "16:12:11"} +{"current_steps": 18920, "total_steps": 28254, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.231269859377367e-05, "epoch": 1.34, "percentage": 66.96, "elapsed_time": "1 day, 8:48:32", "remaining_time": "16:11:09"} +{"current_steps": 18930, "total_steps": 28254, "loss": 0.707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2288754315159912e-05, "epoch": 1.34, "percentage": 67.0, "elapsed_time": "1 day, 8:49:34", "remaining_time": "16:10:07"} +{"current_steps": 18940, "total_steps": 28254, "loss": 0.7213, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2264825752036344e-05, "epoch": 1.34, "percentage": 67.03, "elapsed_time": "1 day, 8:50:35", "remaining_time": "16:09:04"} +{"current_steps": 18950, "total_steps": 28254, "loss": 0.7316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2240912933986945e-05, "epoch": 1.34, "percentage": 67.07, "elapsed_time": "1 day, 8:51:38", "remaining_time": "16:08:01"} +{"current_steps": 18960, "total_steps": 28254, "loss": 0.6816, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2217015890576212e-05, "epoch": 1.34, "percentage": 67.11, "elapsed_time": "1 day, 8:52:39", "remaining_time": "16:06:58"} +{"current_steps": 18970, "total_steps": 28254, "loss": 0.7331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.219313465134913e-05, "epoch": 1.34, "percentage": 67.14, "elapsed_time": "1 day, 8:53:43", "remaining_time": "16:05:56"} +{"current_steps": 18980, "total_steps": 28254, "loss": 0.737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2169269245831171e-05, "epoch": 1.34, "percentage": 67.18, "elapsed_time": "1 day, 8:54:46", "remaining_time": "16:04:54"} +{"current_steps": 18990, "total_steps": 28254, "loss": 0.706, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.214541970352823e-05, "epoch": 1.34, "percentage": 67.21, "elapsed_time": "1 day, 8:55:49", "remaining_time": "16:03:52"} +{"current_steps": 19000, "total_steps": 28254, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2121586053926559e-05, "epoch": 1.34, "percentage": 67.25, "elapsed_time": "1 day, 8:56:53", "remaining_time": "16:02:50"} +{"current_steps": 19010, "total_steps": 28254, "loss": 0.7216, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.20977683264928e-05, "epoch": 1.35, "percentage": 67.28, "elapsed_time": "1 day, 8:57:57", "remaining_time": "16:01:49"} +{"current_steps": 19020, "total_steps": 28254, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2073966550673871e-05, "epoch": 1.35, "percentage": 67.32, "elapsed_time": "1 day, 8:59:00", "remaining_time": "16:00:47"} +{"current_steps": 19030, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2050180755897012e-05, "epoch": 1.35, "percentage": 67.35, "elapsed_time": "1 day, 9:00:04", "remaining_time": "15:59:45"} +{"current_steps": 19040, "total_steps": 28254, "loss": 0.689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2026410971569655e-05, "epoch": 1.35, "percentage": 67.39, "elapsed_time": "1 day, 9:01:05", "remaining_time": "15:58:42"} +{"current_steps": 19050, "total_steps": 28254, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2002657227079486e-05, "epoch": 1.35, "percentage": 67.42, "elapsed_time": "1 day, 9:02:10", "remaining_time": "15:57:41"} +{"current_steps": 19060, "total_steps": 28254, "loss": 0.7008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1978919551794318e-05, "epoch": 1.35, "percentage": 67.46, "elapsed_time": "1 day, 9:03:12", "remaining_time": "15:56:38"} +{"current_steps": 19070, "total_steps": 28254, "loss": 0.7272, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.195519797506213e-05, "epoch": 1.35, "percentage": 67.49, "elapsed_time": "1 day, 9:04:13", "remaining_time": "15:55:35"} +{"current_steps": 19080, "total_steps": 28254, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1931492526210988e-05, "epoch": 1.35, "percentage": 67.53, "elapsed_time": "1 day, 9:05:17", "remaining_time": "15:54:33"} +{"current_steps": 19090, "total_steps": 28254, "loss": 0.6938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1907803234549011e-05, "epoch": 1.35, "percentage": 67.57, "elapsed_time": "1 day, 9:06:19", "remaining_time": "15:53:31"} +{"current_steps": 19100, "total_steps": 28254, "loss": 0.7154, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1884130129364332e-05, "epoch": 1.35, "percentage": 67.6, "elapsed_time": "1 day, 9:07:21", "remaining_time": "15:52:28"} +{"current_steps": 19110, "total_steps": 28254, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1860473239925097e-05, "epoch": 1.35, "percentage": 67.64, "elapsed_time": "1 day, 9:08:23", "remaining_time": "15:51:25"} +{"current_steps": 19120, "total_steps": 28254, "loss": 0.685, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1836832595479403e-05, "epoch": 1.35, "percentage": 67.67, "elapsed_time": "1 day, 9:09:25", "remaining_time": "15:50:23"} +{"current_steps": 19130, "total_steps": 28254, "loss": 0.7255, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.181320822525524e-05, "epoch": 1.35, "percentage": 67.71, "elapsed_time": "1 day, 9:10:27", "remaining_time": "15:49:20"} +{"current_steps": 19140, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.178960015846048e-05, "epoch": 1.35, "percentage": 67.74, "elapsed_time": "1 day, 9:11:30", "remaining_time": "15:48:18"} +{"current_steps": 19150, "total_steps": 28254, "loss": 0.7231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1766008424282863e-05, "epoch": 1.36, "percentage": 67.78, "elapsed_time": "1 day, 9:12:32", "remaining_time": "15:47:15"} +{"current_steps": 19160, "total_steps": 28254, "loss": 0.7174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1742433051889926e-05, "epoch": 1.36, "percentage": 67.81, "elapsed_time": "1 day, 9:13:33", "remaining_time": "15:46:12"} +{"current_steps": 19170, "total_steps": 28254, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1718874070428961e-05, "epoch": 1.36, "percentage": 67.85, "elapsed_time": "1 day, 9:14:35", "remaining_time": "15:45:10"} +{"current_steps": 19180, "total_steps": 28254, "loss": 0.7058, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1695331509027002e-05, "epoch": 1.36, "percentage": 67.88, "elapsed_time": "1 day, 9:15:36", "remaining_time": "15:44:07"} +{"current_steps": 19190, "total_steps": 28254, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1671805396790791e-05, "epoch": 1.36, "percentage": 67.92, "elapsed_time": "1 day, 9:16:38", "remaining_time": "15:43:04"} +{"current_steps": 19200, "total_steps": 28254, "loss": 0.6955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1648295762806743e-05, "epoch": 1.36, "percentage": 67.95, "elapsed_time": "1 day, 9:17:43", "remaining_time": "15:42:03"} +{"current_steps": 19210, "total_steps": 28254, "loss": 0.7148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1624802636140874e-05, "epoch": 1.36, "percentage": 67.99, "elapsed_time": "1 day, 9:18:46", "remaining_time": "15:41:01"} +{"current_steps": 19220, "total_steps": 28254, "loss": 0.7097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1601326045838792e-05, "epoch": 1.36, "percentage": 68.03, "elapsed_time": "1 day, 9:19:47", "remaining_time": "15:39:57"} +{"current_steps": 19230, "total_steps": 28254, "loss": 0.7287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1577866020925685e-05, "epoch": 1.36, "percentage": 68.06, "elapsed_time": "1 day, 9:20:49", "remaining_time": "15:38:55"} +{"current_steps": 19240, "total_steps": 28254, "loss": 0.7097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1554422590406255e-05, "epoch": 1.36, "percentage": 68.1, "elapsed_time": "1 day, 9:21:53", "remaining_time": "15:37:53"} +{"current_steps": 19250, "total_steps": 28254, "loss": 0.693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1530995783264666e-05, "epoch": 1.36, "percentage": 68.13, "elapsed_time": "1 day, 9:22:55", "remaining_time": "15:36:50"} +{"current_steps": 19260, "total_steps": 28254, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1507585628464542e-05, "epoch": 1.36, "percentage": 68.17, "elapsed_time": "1 day, 9:23:57", "remaining_time": "15:35:48"} +{"current_steps": 19270, "total_steps": 28254, "loss": 0.7282, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1484192154948925e-05, "epoch": 1.36, "percentage": 68.2, "elapsed_time": "1 day, 9:24:59", "remaining_time": "15:34:45"} +{"current_steps": 19280, "total_steps": 28254, "loss": 0.7072, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1460815391640237e-05, "epoch": 1.36, "percentage": 68.24, "elapsed_time": "1 day, 9:26:02", "remaining_time": "15:33:43"} +{"current_steps": 19290, "total_steps": 28254, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1437455367440211e-05, "epoch": 1.37, "percentage": 68.27, "elapsed_time": "1 day, 9:27:06", "remaining_time": "15:32:41"} +{"current_steps": 19300, "total_steps": 28254, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1414112111229933e-05, "epoch": 1.37, "percentage": 68.31, "elapsed_time": "1 day, 9:28:08", "remaining_time": "15:31:39"} +{"current_steps": 19310, "total_steps": 28254, "loss": 0.692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1390785651869704e-05, "epoch": 1.37, "percentage": 68.34, "elapsed_time": "1 day, 9:29:11", "remaining_time": "15:30:37"} +{"current_steps": 19320, "total_steps": 28254, "loss": 0.7257, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1367476018199094e-05, "epoch": 1.37, "percentage": 68.38, "elapsed_time": "1 day, 9:30:15", "remaining_time": "15:29:35"} +{"current_steps": 19330, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1344183239036876e-05, "epoch": 1.37, "percentage": 68.42, "elapsed_time": "1 day, 9:31:18", "remaining_time": "15:28:33"} +{"current_steps": 19340, "total_steps": 28254, "loss": 0.6941, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1320907343180958e-05, "epoch": 1.37, "percentage": 68.45, "elapsed_time": "1 day, 9:32:20", "remaining_time": "15:27:30"} +{"current_steps": 19350, "total_steps": 28254, "loss": 0.7482, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.129764835940838e-05, "epoch": 1.37, "percentage": 68.49, "elapsed_time": "1 day, 9:33:22", "remaining_time": "15:26:27"} +{"current_steps": 19360, "total_steps": 28254, "loss": 0.7291, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1274406316475287e-05, "epoch": 1.37, "percentage": 68.52, "elapsed_time": "1 day, 9:34:26", "remaining_time": "15:25:26"} +{"current_steps": 19370, "total_steps": 28254, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1251181243116878e-05, "epoch": 1.37, "percentage": 68.56, "elapsed_time": "1 day, 9:35:29", "remaining_time": "15:24:24"} +{"current_steps": 19380, "total_steps": 28254, "loss": 0.7166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1227973168047362e-05, "epoch": 1.37, "percentage": 68.59, "elapsed_time": "1 day, 9:36:32", "remaining_time": "15:23:21"} +{"current_steps": 19390, "total_steps": 28254, "loss": 0.7189, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1204782119959925e-05, "epoch": 1.37, "percentage": 68.63, "elapsed_time": "1 day, 9:37:36", "remaining_time": "15:22:20"} +{"current_steps": 19400, "total_steps": 28254, "loss": 0.7164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.118160812752672e-05, "epoch": 1.37, "percentage": 68.66, "elapsed_time": "1 day, 9:38:38", "remaining_time": "15:21:17"} +{"current_steps": 19410, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1158451219398819e-05, "epoch": 1.37, "percentage": 68.7, "elapsed_time": "1 day, 9:39:41", "remaining_time": "15:20:15"} +{"current_steps": 19420, "total_steps": 28254, "loss": 0.7305, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1135311424206147e-05, "epoch": 1.37, "percentage": 68.73, "elapsed_time": "1 day, 9:40:44", "remaining_time": "15:19:12"} +{"current_steps": 19430, "total_steps": 28254, "loss": 0.7395, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1112188770557474e-05, "epoch": 1.38, "percentage": 68.77, "elapsed_time": "1 day, 9:41:47", "remaining_time": "15:18:11"} +{"current_steps": 19440, "total_steps": 28254, "loss": 0.6953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1089083287040398e-05, "epoch": 1.38, "percentage": 68.8, "elapsed_time": "1 day, 9:42:50", "remaining_time": "15:17:08"} +{"current_steps": 19450, "total_steps": 28254, "loss": 0.6945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1065995002221283e-05, "epoch": 1.38, "percentage": 68.84, "elapsed_time": "1 day, 9:43:51", "remaining_time": "15:16:05"} +{"current_steps": 19460, "total_steps": 28254, "loss": 0.6879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1042923944645217e-05, "epoch": 1.38, "percentage": 68.88, "elapsed_time": "1 day, 9:44:54", "remaining_time": "15:15:03"} +{"current_steps": 19470, "total_steps": 28254, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.101987014283599e-05, "epoch": 1.38, "percentage": 68.91, "elapsed_time": "1 day, 9:45:57", "remaining_time": "15:14:01"} +{"current_steps": 19480, "total_steps": 28254, "loss": 0.7221, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0996833625296066e-05, "epoch": 1.38, "percentage": 68.95, "elapsed_time": "1 day, 9:47:00", "remaining_time": "15:12:59"} +{"current_steps": 19490, "total_steps": 28254, "loss": 0.67, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.097381442050655e-05, "epoch": 1.38, "percentage": 68.98, "elapsed_time": "1 day, 9:48:04", "remaining_time": "15:11:57"} +{"current_steps": 19500, "total_steps": 28254, "loss": 0.7281, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0950812556927125e-05, "epoch": 1.38, "percentage": 69.02, "elapsed_time": "1 day, 9:49:07", "remaining_time": "15:10:55"} +{"current_steps": 19510, "total_steps": 28254, "loss": 0.7209, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0927828062996026e-05, "epoch": 1.38, "percentage": 69.05, "elapsed_time": "1 day, 9:50:08", "remaining_time": "15:09:52"} +{"current_steps": 19520, "total_steps": 28254, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0904860967130034e-05, "epoch": 1.38, "percentage": 69.09, "elapsed_time": "1 day, 9:51:10", "remaining_time": "15:08:49"} +{"current_steps": 19530, "total_steps": 28254, "loss": 0.7008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0881911297724415e-05, "epoch": 1.38, "percentage": 69.12, "elapsed_time": "1 day, 9:52:14", "remaining_time": "15:07:48"} +{"current_steps": 19540, "total_steps": 28254, "loss": 0.6992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0858979083152906e-05, "epoch": 1.38, "percentage": 69.16, "elapsed_time": "1 day, 9:53:18", "remaining_time": "15:06:45"} +{"current_steps": 19550, "total_steps": 28254, "loss": 0.6969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0836064351767609e-05, "epoch": 1.38, "percentage": 69.19, "elapsed_time": "1 day, 9:54:18", "remaining_time": "15:05:42"} +{"current_steps": 19560, "total_steps": 28254, "loss": 0.7363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0813167131899062e-05, "epoch": 1.38, "percentage": 69.23, "elapsed_time": "1 day, 9:55:19", "remaining_time": "15:04:39"} +{"current_steps": 19570, "total_steps": 28254, "loss": 0.7194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.079028745185614e-05, "epoch": 1.39, "percentage": 69.26, "elapsed_time": "1 day, 9:56:22", "remaining_time": "15:03:37"} +{"current_steps": 19580, "total_steps": 28254, "loss": 0.6893, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0767425339926038e-05, "epoch": 1.39, "percentage": 69.3, "elapsed_time": "1 day, 9:57:25", "remaining_time": "15:02:35"} +{"current_steps": 19590, "total_steps": 28254, "loss": 0.7197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0744580824374217e-05, "epoch": 1.39, "percentage": 69.34, "elapsed_time": "1 day, 9:58:30", "remaining_time": "15:01:33"} +{"current_steps": 19600, "total_steps": 28254, "loss": 0.7105, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0721753933444376e-05, "epoch": 1.39, "percentage": 69.37, "elapsed_time": "1 day, 9:59:32", "remaining_time": "15:00:31"} +{"current_steps": 19610, "total_steps": 28254, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0698944695358448e-05, "epoch": 1.39, "percentage": 69.41, "elapsed_time": "1 day, 10:00:35", "remaining_time": "14:59:29"} +{"current_steps": 19620, "total_steps": 28254, "loss": 0.7077, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0676153138316536e-05, "epoch": 1.39, "percentage": 69.44, "elapsed_time": "1 day, 10:01:38", "remaining_time": "14:58:26"} +{"current_steps": 19630, "total_steps": 28254, "loss": 0.7389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0653379290496872e-05, "epoch": 1.39, "percentage": 69.48, "elapsed_time": "1 day, 10:02:42", "remaining_time": "14:57:24"} +{"current_steps": 19640, "total_steps": 28254, "loss": 0.7202, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0630623180055788e-05, "epoch": 1.39, "percentage": 69.51, "elapsed_time": "1 day, 10:03:44", "remaining_time": "14:56:22"} +{"current_steps": 19650, "total_steps": 28254, "loss": 0.6841, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0607884835127701e-05, "epoch": 1.39, "percentage": 69.55, "elapsed_time": "1 day, 10:04:47", "remaining_time": "14:55:20"} +{"current_steps": 19660, "total_steps": 28254, "loss": 0.6841, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0585164283825075e-05, "epoch": 1.39, "percentage": 69.58, "elapsed_time": "1 day, 10:05:49", "remaining_time": "14:54:17"} +{"current_steps": 19670, "total_steps": 28254, "loss": 0.7387, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0562461554238346e-05, "epoch": 1.39, "percentage": 69.62, "elapsed_time": "1 day, 10:06:51", "remaining_time": "14:53:14"} +{"current_steps": 19680, "total_steps": 28254, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.053977667443592e-05, "epoch": 1.39, "percentage": 69.65, "elapsed_time": "1 day, 10:07:54", "remaining_time": "14:52:12"} +{"current_steps": 19690, "total_steps": 28254, "loss": 0.6954, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0517109672464154e-05, "epoch": 1.39, "percentage": 69.69, "elapsed_time": "1 day, 10:08:57", "remaining_time": "14:51:10"} +{"current_steps": 19700, "total_steps": 28254, "loss": 0.7152, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0494460576347304e-05, "epoch": 1.39, "percentage": 69.72, "elapsed_time": "1 day, 10:09:58", "remaining_time": "14:50:07"} +{"current_steps": 19710, "total_steps": 28254, "loss": 0.6811, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0471829414087462e-05, "epoch": 1.4, "percentage": 69.76, "elapsed_time": "1 day, 10:11:00", "remaining_time": "14:49:04"} +{"current_steps": 19720, "total_steps": 28254, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0449216213664553e-05, "epoch": 1.4, "percentage": 69.8, "elapsed_time": "1 day, 10:12:01", "remaining_time": "14:48:01"} +{"current_steps": 19730, "total_steps": 28254, "loss": 0.7382, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0426621003036315e-05, "epoch": 1.4, "percentage": 69.83, "elapsed_time": "1 day, 10:13:02", "remaining_time": "14:46:58"} +{"current_steps": 19740, "total_steps": 28254, "loss": 0.7026, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0404043810138242e-05, "epoch": 1.4, "percentage": 69.87, "elapsed_time": "1 day, 10:14:04", "remaining_time": "14:45:56"} +{"current_steps": 19750, "total_steps": 28254, "loss": 0.7031, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0381484662883531e-05, "epoch": 1.4, "percentage": 69.9, "elapsed_time": "1 day, 10:15:06", "remaining_time": "14:44:53"} +{"current_steps": 19760, "total_steps": 28254, "loss": 0.6844, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0358943589163073e-05, "epoch": 1.4, "percentage": 69.94, "elapsed_time": "1 day, 10:16:07", "remaining_time": "14:43:50"} +{"current_steps": 19770, "total_steps": 28254, "loss": 0.706, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0336420616845426e-05, "epoch": 1.4, "percentage": 69.97, "elapsed_time": "1 day, 10:17:09", "remaining_time": "14:42:47"} +{"current_steps": 19780, "total_steps": 28254, "loss": 0.7197, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0313915773776772e-05, "epoch": 1.4, "percentage": 70.01, "elapsed_time": "1 day, 10:18:12", "remaining_time": "14:41:45"} +{"current_steps": 19790, "total_steps": 28254, "loss": 0.6994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.029142908778088e-05, "epoch": 1.4, "percentage": 70.04, "elapsed_time": "1 day, 10:19:15", "remaining_time": "14:40:43"} +{"current_steps": 19800, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0268960586659027e-05, "epoch": 1.4, "percentage": 70.08, "elapsed_time": "1 day, 10:20:17", "remaining_time": "14:39:40"} +{"current_steps": 19810, "total_steps": 28254, "loss": 0.719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0246510298190063e-05, "epoch": 1.4, "percentage": 70.11, "elapsed_time": "1 day, 10:21:20", "remaining_time": "14:38:38"} +{"current_steps": 19820, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0224078250130292e-05, "epoch": 1.4, "percentage": 70.15, "elapsed_time": "1 day, 10:22:21", "remaining_time": "14:37:35"} +{"current_steps": 19830, "total_steps": 28254, "loss": 0.7238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.020166447021349e-05, "epoch": 1.4, "percentage": 70.18, "elapsed_time": "1 day, 10:23:24", "remaining_time": "14:36:33"} +{"current_steps": 19840, "total_steps": 28254, "loss": 0.7045, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0179268986150816e-05, "epoch": 1.4, "percentage": 70.22, "elapsed_time": "1 day, 10:24:25", "remaining_time": "14:35:30"} +{"current_steps": 19850, "total_steps": 28254, "loss": 0.6938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0156891825630818e-05, "epoch": 1.41, "percentage": 70.26, "elapsed_time": "1 day, 10:25:27", "remaining_time": "14:34:27"} +{"current_steps": 19860, "total_steps": 28254, "loss": 0.6845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0134533016319402e-05, "epoch": 1.41, "percentage": 70.29, "elapsed_time": "1 day, 10:26:27", "remaining_time": "14:33:24"} +{"current_steps": 19870, "total_steps": 28254, "loss": 0.7167, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0112192585859792e-05, "epoch": 1.41, "percentage": 70.33, "elapsed_time": "1 day, 10:27:29", "remaining_time": "14:32:21"} +{"current_steps": 19880, "total_steps": 28254, "loss": 0.7119, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0089870561872464e-05, "epoch": 1.41, "percentage": 70.36, "elapsed_time": "1 day, 10:28:31", "remaining_time": "14:31:19"} +{"current_steps": 19890, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0067566971955142e-05, "epoch": 1.41, "percentage": 70.4, "elapsed_time": "1 day, 10:29:33", "remaining_time": "14:30:16"} +{"current_steps": 19900, "total_steps": 28254, "loss": 0.7203, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0045281843682778e-05, "epoch": 1.41, "percentage": 70.43, "elapsed_time": "1 day, 10:30:35", "remaining_time": "14:29:13"} +{"current_steps": 19910, "total_steps": 28254, "loss": 0.7004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0023015204607491e-05, "epoch": 1.41, "percentage": 70.47, "elapsed_time": "1 day, 10:31:38", "remaining_time": "14:28:11"} +{"current_steps": 19920, "total_steps": 28254, "loss": 0.7156, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0000767082258536e-05, "epoch": 1.41, "percentage": 70.5, "elapsed_time": "1 day, 10:32:40", "remaining_time": "14:27:09"} +{"current_steps": 19930, "total_steps": 28254, "loss": 0.6905, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.978537504142266e-06, "epoch": 1.41, "percentage": 70.54, "elapsed_time": "1 day, 10:33:43", "remaining_time": "14:26:06"} +{"current_steps": 19940, "total_steps": 28254, "loss": 0.6819, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.956326497742121e-06, "epoch": 1.41, "percentage": 70.57, "elapsed_time": "1 day, 10:34:46", "remaining_time": "14:25:04"} +{"current_steps": 19950, "total_steps": 28254, "loss": 0.6979, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.934134090518593e-06, "epoch": 1.41, "percentage": 70.61, "elapsed_time": "1 day, 10:35:49", "remaining_time": "14:24:02"} +{"current_steps": 19960, "total_steps": 28254, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.911960309909152e-06, "epoch": 1.41, "percentage": 70.64, "elapsed_time": "1 day, 10:36:51", "remaining_time": "14:22:59"} +{"current_steps": 19970, "total_steps": 28254, "loss": 0.7176, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.889805183328238e-06, "epoch": 1.41, "percentage": 70.68, "elapsed_time": "1 day, 10:37:53", "remaining_time": "14:21:57"} +{"current_steps": 19980, "total_steps": 28254, "loss": 0.6989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.86766873816725e-06, "epoch": 1.41, "percentage": 70.72, "elapsed_time": "1 day, 10:38:56", "remaining_time": "14:20:55"} +{"current_steps": 19990, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.84555100179449e-06, "epoch": 1.41, "percentage": 70.75, "elapsed_time": "1 day, 10:39:59", "remaining_time": "14:19:52"} +{"current_steps": 20000, "total_steps": 28254, "loss": 0.7361, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.823452001555109e-06, "epoch": 1.42, "percentage": 70.79, "elapsed_time": "1 day, 10:41:00", "remaining_time": "14:18:50"} +{"current_steps": 20010, "total_steps": 28254, "loss": 0.7238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.8013717647711e-06, "epoch": 1.42, "percentage": 70.82, "elapsed_time": "1 day, 10:42:02", "remaining_time": "14:17:47"} +{"current_steps": 20020, "total_steps": 28254, "loss": 0.7321, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.779310318741267e-06, "epoch": 1.42, "percentage": 70.86, "elapsed_time": "1 day, 10:43:06", "remaining_time": "14:16:45"} +{"current_steps": 20030, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.75726769074118e-06, "epoch": 1.42, "percentage": 70.89, "elapsed_time": "1 day, 10:44:09", "remaining_time": "14:15:43"} +{"current_steps": 20040, "total_steps": 28254, "loss": 0.6871, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.735243908023154e-06, "epoch": 1.42, "percentage": 70.93, "elapsed_time": "1 day, 10:45:11", "remaining_time": "14:14:40"} +{"current_steps": 20050, "total_steps": 28254, "loss": 0.7289, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.71323899781616e-06, "epoch": 1.42, "percentage": 70.96, "elapsed_time": "1 day, 10:46:15", "remaining_time": "14:13:38"} +{"current_steps": 20060, "total_steps": 28254, "loss": 0.6958, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.691252987325886e-06, "epoch": 1.42, "percentage": 71.0, "elapsed_time": "1 day, 10:47:17", "remaining_time": "14:12:36"} +{"current_steps": 20070, "total_steps": 28254, "loss": 0.7123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.669285903734632e-06, "epoch": 1.42, "percentage": 71.03, "elapsed_time": "1 day, 10:48:16", "remaining_time": "14:11:32"} +{"current_steps": 20080, "total_steps": 28254, "loss": 0.7123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.647337774201312e-06, "epoch": 1.42, "percentage": 71.07, "elapsed_time": "1 day, 10:49:19", "remaining_time": "14:10:30"} +{"current_steps": 20090, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.625408625861387e-06, "epoch": 1.42, "percentage": 71.1, "elapsed_time": "1 day, 10:50:20", "remaining_time": "14:09:27"} +{"current_steps": 20100, "total_steps": 28254, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.603498485826848e-06, "epoch": 1.42, "percentage": 71.14, "elapsed_time": "1 day, 10:51:23", "remaining_time": "14:08:25"} +{"current_steps": 20110, "total_steps": 28254, "loss": 0.7247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.581607381186203e-06, "epoch": 1.42, "percentage": 71.18, "elapsed_time": "1 day, 10:52:27", "remaining_time": "14:07:23"} +{"current_steps": 20120, "total_steps": 28254, "loss": 0.7389, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.559735339004434e-06, "epoch": 1.42, "percentage": 71.21, "elapsed_time": "1 day, 10:53:32", "remaining_time": "14:06:21"} +{"current_steps": 20130, "total_steps": 28254, "loss": 0.7298, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.537882386322921e-06, "epoch": 1.42, "percentage": 71.25, "elapsed_time": "1 day, 10:54:34", "remaining_time": "14:05:19"} +{"current_steps": 20140, "total_steps": 28254, "loss": 0.7032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.516048550159463e-06, "epoch": 1.43, "percentage": 71.28, "elapsed_time": "1 day, 10:55:36", "remaining_time": "14:04:16"} +{"current_steps": 20150, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.494233857508227e-06, "epoch": 1.43, "percentage": 71.32, "elapsed_time": "1 day, 10:56:39", "remaining_time": "14:03:14"} +{"current_steps": 20160, "total_steps": 28254, "loss": 0.7182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.472438335339717e-06, "epoch": 1.43, "percentage": 71.35, "elapsed_time": "1 day, 10:57:43", "remaining_time": "14:02:12"} +{"current_steps": 20170, "total_steps": 28254, "loss": 0.7044, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.450662010600716e-06, "epoch": 1.43, "percentage": 71.39, "elapsed_time": "1 day, 10:58:46", "remaining_time": "14:01:10"} +{"current_steps": 20180, "total_steps": 28254, "loss": 0.723, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.428904910214278e-06, "epoch": 1.43, "percentage": 71.42, "elapsed_time": "1 day, 10:59:50", "remaining_time": "14:00:08"} +{"current_steps": 20190, "total_steps": 28254, "loss": 0.6971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.407167061079702e-06, "epoch": 1.43, "percentage": 71.46, "elapsed_time": "1 day, 11:00:53", "remaining_time": "13:59:06"} +{"current_steps": 20200, "total_steps": 28254, "loss": 0.6989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.385448490072485e-06, "epoch": 1.43, "percentage": 71.49, "elapsed_time": "1 day, 11:01:54", "remaining_time": "13:58:03"} +{"current_steps": 20210, "total_steps": 28254, "loss": 0.7097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.363749224044274e-06, "epoch": 1.43, "percentage": 71.53, "elapsed_time": "1 day, 11:02:55", "remaining_time": "13:57:00"} +{"current_steps": 20220, "total_steps": 28254, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.342069289822852e-06, "epoch": 1.43, "percentage": 71.57, "elapsed_time": "1 day, 11:04:00", "remaining_time": "13:55:58"} +{"current_steps": 20230, "total_steps": 28254, "loss": 0.7118, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.32040871421211e-06, "epoch": 1.43, "percentage": 71.6, "elapsed_time": "1 day, 11:05:02", "remaining_time": "13:54:56"} +{"current_steps": 20240, "total_steps": 28254, "loss": 0.7372, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.298767523991999e-06, "epoch": 1.43, "percentage": 71.64, "elapsed_time": "1 day, 11:06:05", "remaining_time": "13:53:54"} +{"current_steps": 20250, "total_steps": 28254, "loss": 0.707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.277145745918528e-06, "epoch": 1.43, "percentage": 71.67, "elapsed_time": "1 day, 11:07:07", "remaining_time": "13:52:51"} +{"current_steps": 20260, "total_steps": 28254, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.25554340672365e-06, "epoch": 1.43, "percentage": 71.71, "elapsed_time": "1 day, 11:08:10", "remaining_time": "13:51:49"} +{"current_steps": 20270, "total_steps": 28254, "loss": 0.7151, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.233960533115326e-06, "epoch": 1.43, "percentage": 71.74, "elapsed_time": "1 day, 11:09:10", "remaining_time": "13:50:46"} +{"current_steps": 20280, "total_steps": 28254, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.212397151777449e-06, "epoch": 1.44, "percentage": 71.78, "elapsed_time": "1 day, 11:10:14", "remaining_time": "13:49:44"} +{"current_steps": 20290, "total_steps": 28254, "loss": 0.6909, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.190853289369825e-06, "epoch": 1.44, "percentage": 71.81, "elapsed_time": "1 day, 11:11:18", "remaining_time": "13:48:42"} +{"current_steps": 20300, "total_steps": 28254, "loss": 0.7325, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.169328972528072e-06, "epoch": 1.44, "percentage": 71.85, "elapsed_time": "1 day, 11:12:22", "remaining_time": "13:47:40"} +{"current_steps": 20310, "total_steps": 28254, "loss": 0.6977, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.147824227863697e-06, "epoch": 1.44, "percentage": 71.88, "elapsed_time": "1 day, 11:13:24", "remaining_time": "13:46:37"} +{"current_steps": 20320, "total_steps": 28254, "loss": 0.7079, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.126339081963995e-06, "epoch": 1.44, "percentage": 71.92, "elapsed_time": "1 day, 11:14:26", "remaining_time": "13:45:35"} +{"current_steps": 20330, "total_steps": 28254, "loss": 0.6974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.104873561392032e-06, "epoch": 1.44, "percentage": 71.95, "elapsed_time": "1 day, 11:15:26", "remaining_time": "13:44:32"} +{"current_steps": 20340, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.0834276926866e-06, "epoch": 1.44, "percentage": 71.99, "elapsed_time": "1 day, 11:16:27", "remaining_time": "13:43:28"} +{"current_steps": 20350, "total_steps": 28254, "loss": 0.7133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.062001502362192e-06, "epoch": 1.44, "percentage": 72.03, "elapsed_time": "1 day, 11:17:31", "remaining_time": "13:42:27"} +{"current_steps": 20360, "total_steps": 28254, "loss": 0.7142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.040595016908988e-06, "epoch": 1.44, "percentage": 72.06, "elapsed_time": "1 day, 11:18:31", "remaining_time": "13:41:23"} +{"current_steps": 20370, "total_steps": 28254, "loss": 0.6902, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.019208262792802e-06, "epoch": 1.44, "percentage": 72.1, "elapsed_time": "1 day, 11:19:35", "remaining_time": "13:40:21"} +{"current_steps": 20380, "total_steps": 28254, "loss": 0.7239, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.997841266455048e-06, "epoch": 1.44, "percentage": 72.13, "elapsed_time": "1 day, 11:20:38", "remaining_time": "13:39:19"} +{"current_steps": 20390, "total_steps": 28254, "loss": 0.7354, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.976494054312701e-06, "epoch": 1.44, "percentage": 72.17, "elapsed_time": "1 day, 11:21:42", "remaining_time": "13:38:17"} +{"current_steps": 20400, "total_steps": 28254, "loss": 0.719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.955166652758298e-06, "epoch": 1.44, "percentage": 72.2, "elapsed_time": "1 day, 11:22:44", "remaining_time": "13:37:15"} +{"current_steps": 20410, "total_steps": 28254, "loss": 0.6968, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.933859088159884e-06, "epoch": 1.44, "percentage": 72.24, "elapsed_time": "1 day, 11:23:48", "remaining_time": "13:36:13"} +{"current_steps": 20420, "total_steps": 28254, "loss": 0.7093, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.912571386860958e-06, "epoch": 1.45, "percentage": 72.27, "elapsed_time": "1 day, 11:24:48", "remaining_time": "13:35:10"} +{"current_steps": 20430, "total_steps": 28254, "loss": 0.6914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.891303575180463e-06, "epoch": 1.45, "percentage": 72.31, "elapsed_time": "1 day, 11:25:52", "remaining_time": "13:34:08"} +{"current_steps": 20440, "total_steps": 28254, "loss": 0.689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.870055679412767e-06, "epoch": 1.45, "percentage": 72.34, "elapsed_time": "1 day, 11:26:55", "remaining_time": "13:33:06"} +{"current_steps": 20450, "total_steps": 28254, "loss": 0.7132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.848827725827621e-06, "epoch": 1.45, "percentage": 72.38, "elapsed_time": "1 day, 11:27:57", "remaining_time": "13:32:03"} +{"current_steps": 20460, "total_steps": 28254, "loss": 0.6924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.827619740670099e-06, "epoch": 1.45, "percentage": 72.41, "elapsed_time": "1 day, 11:28:58", "remaining_time": "13:31:00"} +{"current_steps": 20470, "total_steps": 28254, "loss": 0.7063, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.806431750160585e-06, "epoch": 1.45, "percentage": 72.45, "elapsed_time": "1 day, 11:29:59", "remaining_time": "13:29:57"} +{"current_steps": 20480, "total_steps": 28254, "loss": 0.6989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.785263780494763e-06, "epoch": 1.45, "percentage": 72.49, "elapsed_time": "1 day, 11:31:02", "remaining_time": "13:28:55"} +{"current_steps": 20490, "total_steps": 28254, "loss": 0.6888, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.764115857843555e-06, "epoch": 1.45, "percentage": 72.52, "elapsed_time": "1 day, 11:32:05", "remaining_time": "13:27:53"} +{"current_steps": 20500, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.742988008353115e-06, "epoch": 1.45, "percentage": 72.56, "elapsed_time": "1 day, 11:33:07", "remaining_time": "13:26:50"} +{"current_steps": 20510, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.72188025814473e-06, "epoch": 1.45, "percentage": 72.59, "elapsed_time": "1 day, 11:34:10", "remaining_time": "13:25:48"} +{"current_steps": 20520, "total_steps": 28254, "loss": 0.7406, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.700792633314886e-06, "epoch": 1.45, "percentage": 72.63, "elapsed_time": "1 day, 11:35:12", "remaining_time": "13:24:45"} +{"current_steps": 20530, "total_steps": 28254, "loss": 0.6906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.67972515993517e-06, "epoch": 1.45, "percentage": 72.66, "elapsed_time": "1 day, 11:36:13", "remaining_time": "13:23:42"} +{"current_steps": 20540, "total_steps": 28254, "loss": 0.7051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.658677864052264e-06, "epoch": 1.45, "percentage": 72.7, "elapsed_time": "1 day, 11:37:16", "remaining_time": "13:22:40"} +{"current_steps": 20550, "total_steps": 28254, "loss": 0.683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.637650771687891e-06, "epoch": 1.45, "percentage": 72.73, "elapsed_time": "1 day, 11:38:20", "remaining_time": "13:21:38"} +{"current_steps": 20560, "total_steps": 28254, "loss": 0.6955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.616643908838787e-06, "epoch": 1.46, "percentage": 72.77, "elapsed_time": "1 day, 11:39:24", "remaining_time": "13:20:36"} +{"current_steps": 20570, "total_steps": 28254, "loss": 0.6916, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.595657301476704e-06, "epoch": 1.46, "percentage": 72.8, "elapsed_time": "1 day, 11:40:24", "remaining_time": "13:19:33"} +{"current_steps": 20580, "total_steps": 28254, "loss": 0.7069, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.574690975548339e-06, "epoch": 1.46, "percentage": 72.84, "elapsed_time": "1 day, 11:41:26", "remaining_time": "13:18:30"} +{"current_steps": 20590, "total_steps": 28254, "loss": 0.7208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.55374495697531e-06, "epoch": 1.46, "percentage": 72.87, "elapsed_time": "1 day, 11:42:29", "remaining_time": "13:17:28"} +{"current_steps": 20600, "total_steps": 28254, "loss": 0.7038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.53281927165412e-06, "epoch": 1.46, "percentage": 72.91, "elapsed_time": "1 day, 11:43:30", "remaining_time": "13:16:25"} +{"current_steps": 20610, "total_steps": 28254, "loss": 0.6982, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.51191394545615e-06, "epoch": 1.46, "percentage": 72.95, "elapsed_time": "1 day, 11:44:33", "remaining_time": "13:15:23"} +{"current_steps": 20620, "total_steps": 28254, "loss": 0.6804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.49102900422762e-06, "epoch": 1.46, "percentage": 72.98, "elapsed_time": "1 day, 11:45:41", "remaining_time": "13:14:22"} +{"current_steps": 20630, "total_steps": 28254, "loss": 0.6846, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.470164473789516e-06, "epoch": 1.46, "percentage": 73.02, "elapsed_time": "1 day, 11:46:43", "remaining_time": "13:13:20"} +{"current_steps": 20640, "total_steps": 28254, "loss": 0.729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.449320379937594e-06, "epoch": 1.46, "percentage": 73.05, "elapsed_time": "1 day, 11:47:44", "remaining_time": "13:12:17"} +{"current_steps": 20650, "total_steps": 28254, "loss": 0.6942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.428496748442371e-06, "epoch": 1.46, "percentage": 73.09, "elapsed_time": "1 day, 11:48:48", "remaining_time": "13:11:15"} +{"current_steps": 20660, "total_steps": 28254, "loss": 0.7044, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.40769360504905e-06, "epoch": 1.46, "percentage": 73.12, "elapsed_time": "1 day, 11:49:50", "remaining_time": "13:10:13"} +{"current_steps": 20670, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.386910975477494e-06, "epoch": 1.46, "percentage": 73.16, "elapsed_time": "1 day, 11:50:54", "remaining_time": "13:09:11"} +{"current_steps": 20680, "total_steps": 28254, "loss": 0.7018, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.366148885422204e-06, "epoch": 1.46, "percentage": 73.19, "elapsed_time": "1 day, 11:51:56", "remaining_time": "13:08:08"} +{"current_steps": 20690, "total_steps": 28254, "loss": 0.7247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.345407360552302e-06, "epoch": 1.46, "percentage": 73.23, "elapsed_time": "1 day, 11:52:59", "remaining_time": "13:07:06"} +{"current_steps": 20700, "total_steps": 28254, "loss": 0.698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.324686426511486e-06, "epoch": 1.47, "percentage": 73.26, "elapsed_time": "1 day, 11:54:00", "remaining_time": "13:06:03"} +{"current_steps": 20710, "total_steps": 28254, "loss": 0.7123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.30398610891798e-06, "epoch": 1.47, "percentage": 73.3, "elapsed_time": "1 day, 11:55:04", "remaining_time": "13:05:01"} +{"current_steps": 20720, "total_steps": 28254, "loss": 0.7027, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.283306433364518e-06, "epoch": 1.47, "percentage": 73.33, "elapsed_time": "1 day, 11:56:04", "remaining_time": "13:03:58"} +{"current_steps": 20730, "total_steps": 28254, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.26264742541833e-06, "epoch": 1.47, "percentage": 73.37, "elapsed_time": "1 day, 11:57:03", "remaining_time": "13:02:54"} +{"current_steps": 20740, "total_steps": 28254, "loss": 0.7091, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.242009110621085e-06, "epoch": 1.47, "percentage": 73.41, "elapsed_time": "1 day, 11:58:06", "remaining_time": "13:01:52"} +{"current_steps": 20750, "total_steps": 28254, "loss": 0.6848, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.221391514488885e-06, "epoch": 1.47, "percentage": 73.44, "elapsed_time": "1 day, 11:59:08", "remaining_time": "13:00:49"} +{"current_steps": 20760, "total_steps": 28254, "loss": 0.6872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.200794662512168e-06, "epoch": 1.47, "percentage": 73.48, "elapsed_time": "1 day, 12:00:10", "remaining_time": "12:59:47"} +{"current_steps": 20770, "total_steps": 28254, "loss": 0.6894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.180218580155774e-06, "epoch": 1.47, "percentage": 73.51, "elapsed_time": "1 day, 12:01:12", "remaining_time": "12:58:44"} +{"current_steps": 20780, "total_steps": 28254, "loss": 0.7007, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.159663292858846e-06, "epoch": 1.47, "percentage": 73.55, "elapsed_time": "1 day, 12:02:13", "remaining_time": "12:57:41"} +{"current_steps": 20790, "total_steps": 28254, "loss": 0.7175, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.13912882603483e-06, "epoch": 1.47, "percentage": 73.58, "elapsed_time": "1 day, 12:03:18", "remaining_time": "12:56:40"} +{"current_steps": 20800, "total_steps": 28254, "loss": 0.7258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.118615205071411e-06, "epoch": 1.47, "percentage": 73.62, "elapsed_time": "1 day, 12:04:19", "remaining_time": "12:55:37"} +{"current_steps": 20810, "total_steps": 28254, "loss": 0.7141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.098122455330497e-06, "epoch": 1.47, "percentage": 73.65, "elapsed_time": "1 day, 12:05:23", "remaining_time": "12:54:35"} +{"current_steps": 20820, "total_steps": 28254, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.077650602148221e-06, "epoch": 1.47, "percentage": 73.69, "elapsed_time": "1 day, 12:06:27", "remaining_time": "12:53:33"} +{"current_steps": 20830, "total_steps": 28254, "loss": 0.6966, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.057199670834867e-06, "epoch": 1.47, "percentage": 73.72, "elapsed_time": "1 day, 12:07:31", "remaining_time": "12:52:31"} +{"current_steps": 20840, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.036769686674844e-06, "epoch": 1.48, "percentage": 73.76, "elapsed_time": "1 day, 12:08:34", "remaining_time": "12:51:29"} +{"current_steps": 20850, "total_steps": 28254, "loss": 0.7032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.016360674926663e-06, "epoch": 1.48, "percentage": 73.79, "elapsed_time": "1 day, 12:09:37", "remaining_time": "12:50:27"} +{"current_steps": 20860, "total_steps": 28254, "loss": 0.7441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.995972660822914e-06, "epoch": 1.48, "percentage": 73.83, "elapsed_time": "1 day, 12:10:40", "remaining_time": "12:49:24"} +{"current_steps": 20870, "total_steps": 28254, "loss": 0.6719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.975605669570235e-06, "epoch": 1.48, "percentage": 73.87, "elapsed_time": "1 day, 12:11:41", "remaining_time": "12:48:21"} +{"current_steps": 20880, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.95525972634926e-06, "epoch": 1.48, "percentage": 73.9, "elapsed_time": "1 day, 12:12:43", "remaining_time": "12:47:19"} +{"current_steps": 20890, "total_steps": 28254, "loss": 0.7079, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.934934856314586e-06, "epoch": 1.48, "percentage": 73.94, "elapsed_time": "1 day, 12:13:45", "remaining_time": "12:46:16"} +{"current_steps": 20900, "total_steps": 28254, "loss": 0.693, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.914631084594783e-06, "epoch": 1.48, "percentage": 73.97, "elapsed_time": "1 day, 12:14:48", "remaining_time": "12:45:14"} +{"current_steps": 20910, "total_steps": 28254, "loss": 0.7302, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.89434843629234e-06, "epoch": 1.48, "percentage": 74.01, "elapsed_time": "1 day, 12:15:51", "remaining_time": "12:44:12"} +{"current_steps": 20920, "total_steps": 28254, "loss": 0.6851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.874086936483599e-06, "epoch": 1.48, "percentage": 74.04, "elapsed_time": "1 day, 12:16:53", "remaining_time": "12:43:09"} +{"current_steps": 20930, "total_steps": 28254, "loss": 0.7151, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.853846610218771e-06, "epoch": 1.48, "percentage": 74.08, "elapsed_time": "1 day, 12:17:55", "remaining_time": "12:42:06"} +{"current_steps": 20940, "total_steps": 28254, "loss": 0.7283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.833627482521893e-06, "epoch": 1.48, "percentage": 74.11, "elapsed_time": "1 day, 12:18:57", "remaining_time": "12:41:04"} +{"current_steps": 20950, "total_steps": 28254, "loss": 0.726, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.813429578390801e-06, "epoch": 1.48, "percentage": 74.15, "elapsed_time": "1 day, 12:20:01", "remaining_time": "12:40:02"} +{"current_steps": 20960, "total_steps": 28254, "loss": 0.6808, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.793252922797075e-06, "epoch": 1.48, "percentage": 74.18, "elapsed_time": "1 day, 12:21:04", "remaining_time": "12:39:00"} +{"current_steps": 20970, "total_steps": 28254, "loss": 0.7085, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.773097540686023e-06, "epoch": 1.48, "percentage": 74.22, "elapsed_time": "1 day, 12:22:07", "remaining_time": "12:37:58"} +{"current_steps": 20980, "total_steps": 28254, "loss": 0.6917, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.752963456976661e-06, "epoch": 1.49, "percentage": 74.25, "elapsed_time": "1 day, 12:23:10", "remaining_time": "12:36:55"} +{"current_steps": 20990, "total_steps": 28254, "loss": 0.7309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.732850696561683e-06, "epoch": 1.49, "percentage": 74.29, "elapsed_time": "1 day, 12:24:14", "remaining_time": "12:35:53"} +{"current_steps": 21000, "total_steps": 28254, "loss": 0.7005, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.7127592843074e-06, "epoch": 1.49, "percentage": 74.33, "elapsed_time": "1 day, 12:25:18", "remaining_time": "12:34:52"} +{"current_steps": 21010, "total_steps": 28254, "loss": 0.6843, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.692689245053728e-06, "epoch": 1.49, "percentage": 74.36, "elapsed_time": "1 day, 12:26:21", "remaining_time": "12:33:49"} +{"current_steps": 21020, "total_steps": 28254, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.672640603614179e-06, "epoch": 1.49, "percentage": 74.4, "elapsed_time": "1 day, 12:27:23", "remaining_time": "12:32:47"} +{"current_steps": 21030, "total_steps": 28254, "loss": 0.7229, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.652613384775791e-06, "epoch": 1.49, "percentage": 74.43, "elapsed_time": "1 day, 12:28:22", "remaining_time": "12:31:43"} +{"current_steps": 21040, "total_steps": 28254, "loss": 0.7032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.632607613299142e-06, "epoch": 1.49, "percentage": 74.47, "elapsed_time": "1 day, 12:29:24", "remaining_time": "12:30:41"} +{"current_steps": 21050, "total_steps": 28254, "loss": 0.7184, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.612623313918263e-06, "epoch": 1.49, "percentage": 74.5, "elapsed_time": "1 day, 12:30:27", "remaining_time": "12:29:38"} +{"current_steps": 21060, "total_steps": 28254, "loss": 0.7004, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.592660511340641e-06, "epoch": 1.49, "percentage": 74.54, "elapsed_time": "1 day, 12:31:30", "remaining_time": "12:28:36"} +{"current_steps": 21070, "total_steps": 28254, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.572719230247205e-06, "epoch": 1.49, "percentage": 74.57, "elapsed_time": "1 day, 12:32:33", "remaining_time": "12:27:34"} +{"current_steps": 21080, "total_steps": 28254, "loss": 0.6928, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.552799495292273e-06, "epoch": 1.49, "percentage": 74.61, "elapsed_time": "1 day, 12:33:36", "remaining_time": "12:26:32"} +{"current_steps": 21090, "total_steps": 28254, "loss": 0.686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.532901331103512e-06, "epoch": 1.49, "percentage": 74.64, "elapsed_time": "1 day, 12:34:38", "remaining_time": "12:25:29"} +{"current_steps": 21100, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.513024762281914e-06, "epoch": 1.49, "percentage": 74.68, "elapsed_time": "1 day, 12:35:44", "remaining_time": "12:24:28"} +{"current_steps": 21110, "total_steps": 28254, "loss": 0.6919, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.493169813401799e-06, "epoch": 1.49, "percentage": 74.72, "elapsed_time": "1 day, 12:36:46", "remaining_time": "12:23:25"} +{"current_steps": 21120, "total_steps": 28254, "loss": 0.7132, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.473336509010742e-06, "epoch": 1.49, "percentage": 74.75, "elapsed_time": "1 day, 12:37:49", "remaining_time": "12:22:23"} +{"current_steps": 21130, "total_steps": 28254, "loss": 0.7174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.453524873629553e-06, "epoch": 1.5, "percentage": 74.79, "elapsed_time": "1 day, 12:38:53", "remaining_time": "12:21:21"} +{"current_steps": 21140, "total_steps": 28254, "loss": 0.7243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.4337349317522485e-06, "epoch": 1.5, "percentage": 74.82, "elapsed_time": "1 day, 12:39:56", "remaining_time": "12:20:19"} +{"current_steps": 21150, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.41396670784604e-06, "epoch": 1.5, "percentage": 74.86, "elapsed_time": "1 day, 12:40:58", "remaining_time": "12:19:16"} +{"current_steps": 21160, "total_steps": 28254, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.394220226351286e-06, "epoch": 1.5, "percentage": 74.89, "elapsed_time": "1 day, 12:42:00", "remaining_time": "12:18:14"} +{"current_steps": 21170, "total_steps": 28254, "loss": 0.6906, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.374495511681454e-06, "epoch": 1.5, "percentage": 74.93, "elapsed_time": "1 day, 12:43:03", "remaining_time": "12:17:11"} +{"current_steps": 21180, "total_steps": 28254, "loss": 0.6896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.354792588223094e-06, "epoch": 1.5, "percentage": 74.96, "elapsed_time": "1 day, 12:44:07", "remaining_time": "12:16:09"} +{"current_steps": 21190, "total_steps": 28254, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.3351114803358354e-06, "epoch": 1.5, "percentage": 75.0, "elapsed_time": "1 day, 12:45:09", "remaining_time": "12:15:07"} +{"current_steps": 21200, "total_steps": 28254, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.3154522123523305e-06, "epoch": 1.5, "percentage": 75.03, "elapsed_time": "1 day, 12:46:11", "remaining_time": "12:14:04"} +{"current_steps": 21210, "total_steps": 28254, "loss": 0.6861, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.295814808578216e-06, "epoch": 1.5, "percentage": 75.07, "elapsed_time": "1 day, 12:47:11", "remaining_time": "12:13:01"} +{"current_steps": 21220, "total_steps": 28254, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.276199293292102e-06, "epoch": 1.5, "percentage": 75.1, "elapsed_time": "1 day, 12:48:15", "remaining_time": "12:11:59"} +{"current_steps": 21230, "total_steps": 28254, "loss": 0.7065, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.256605690745547e-06, "epoch": 1.5, "percentage": 75.14, "elapsed_time": "1 day, 12:49:18", "remaining_time": "12:10:57"} +{"current_steps": 21240, "total_steps": 28254, "loss": 0.7173, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.237034025163017e-06, "epoch": 1.5, "percentage": 75.18, "elapsed_time": "1 day, 12:50:19", "remaining_time": "12:09:54"} +{"current_steps": 21250, "total_steps": 28254, "loss": 0.7191, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.217484320741838e-06, "epoch": 1.5, "percentage": 75.21, "elapsed_time": "1 day, 12:51:22", "remaining_time": "12:08:52"} +{"current_steps": 21260, "total_steps": 28254, "loss": 0.7349, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.197956601652212e-06, "epoch": 1.5, "percentage": 75.25, "elapsed_time": "1 day, 12:52:25", "remaining_time": "12:07:50"} +{"current_steps": 21270, "total_steps": 28254, "loss": 0.6995, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.178450892037128e-06, "epoch": 1.51, "percentage": 75.28, "elapsed_time": "1 day, 12:53:26", "remaining_time": "12:06:47"} +{"current_steps": 21280, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.158967216012396e-06, "epoch": 1.51, "percentage": 75.32, "elapsed_time": "1 day, 12:54:31", "remaining_time": "12:05:45"} +{"current_steps": 21290, "total_steps": 28254, "loss": 0.6755, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.139505597666557e-06, "epoch": 1.51, "percentage": 75.35, "elapsed_time": "1 day, 12:55:34", "remaining_time": "12:04:43"} +{"current_steps": 21300, "total_steps": 28254, "loss": 0.6743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.120066061060906e-06, "epoch": 1.51, "percentage": 75.39, "elapsed_time": "1 day, 12:56:36", "remaining_time": "12:03:40"} +{"current_steps": 21310, "total_steps": 28254, "loss": 0.7079, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.100648630229412e-06, "epoch": 1.51, "percentage": 75.42, "elapsed_time": "1 day, 12:57:38", "remaining_time": "12:02:38"} +{"current_steps": 21320, "total_steps": 28254, "loss": 0.7348, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.081253329178727e-06, "epoch": 1.51, "percentage": 75.46, "elapsed_time": "1 day, 12:58:40", "remaining_time": "12:01:35"} +{"current_steps": 21330, "total_steps": 28254, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.061880181888158e-06, "epoch": 1.51, "percentage": 75.49, "elapsed_time": "1 day, 12:59:42", "remaining_time": "12:00:32"} +{"current_steps": 21340, "total_steps": 28254, "loss": 0.7129, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.042529212309599e-06, "epoch": 1.51, "percentage": 75.53, "elapsed_time": "1 day, 13:00:45", "remaining_time": "11:59:30"} +{"current_steps": 21350, "total_steps": 28254, "loss": 0.6997, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.023200444367517e-06, "epoch": 1.51, "percentage": 75.56, "elapsed_time": "1 day, 13:01:47", "remaining_time": "11:58:27"} +{"current_steps": 21360, "total_steps": 28254, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.0038939019589605e-06, "epoch": 1.51, "percentage": 75.6, "elapsed_time": "1 day, 13:02:54", "remaining_time": "11:57:26"} +{"current_steps": 21370, "total_steps": 28254, "loss": 0.7097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.984609608953488e-06, "epoch": 1.51, "percentage": 75.64, "elapsed_time": "1 day, 13:03:55", "remaining_time": "11:56:24"} +{"current_steps": 21380, "total_steps": 28254, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.965347589193141e-06, "epoch": 1.51, "percentage": 75.67, "elapsed_time": "1 day, 13:04:56", "remaining_time": "11:55:21"} +{"current_steps": 21390, "total_steps": 28254, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.9461078664924216e-06, "epoch": 1.51, "percentage": 75.71, "elapsed_time": "1 day, 13:05:58", "remaining_time": "11:54:18"} +{"current_steps": 21400, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.926890464638277e-06, "epoch": 1.51, "percentage": 75.74, "elapsed_time": "1 day, 13:07:00", "remaining_time": "11:53:16"} +{"current_steps": 21410, "total_steps": 28254, "loss": 0.7316, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.907695407390066e-06, "epoch": 1.52, "percentage": 75.78, "elapsed_time": "1 day, 13:08:02", "remaining_time": "11:52:13"} +{"current_steps": 21420, "total_steps": 28254, "loss": 0.7124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.888522718479498e-06, "epoch": 1.52, "percentage": 75.81, "elapsed_time": "1 day, 13:09:03", "remaining_time": "11:51:10"} +{"current_steps": 21430, "total_steps": 28254, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.869372421610632e-06, "epoch": 1.52, "percentage": 75.85, "elapsed_time": "1 day, 13:10:07", "remaining_time": "11:50:08"} +{"current_steps": 21440, "total_steps": 28254, "loss": 0.7065, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.85024454045986e-06, "epoch": 1.52, "percentage": 75.88, "elapsed_time": "1 day, 13:11:10", "remaining_time": "11:49:06"} +{"current_steps": 21450, "total_steps": 28254, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.831139098675854e-06, "epoch": 1.52, "percentage": 75.92, "elapsed_time": "1 day, 13:12:12", "remaining_time": "11:48:03"} +{"current_steps": 21460, "total_steps": 28254, "loss": 0.686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.812056119879534e-06, "epoch": 1.52, "percentage": 75.95, "elapsed_time": "1 day, 13:13:15", "remaining_time": "11:47:01"} +{"current_steps": 21470, "total_steps": 28254, "loss": 0.6915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.792995627664042e-06, "epoch": 1.52, "percentage": 75.99, "elapsed_time": "1 day, 13:14:16", "remaining_time": "11:45:58"} +{"current_steps": 21480, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.773957645594742e-06, "epoch": 1.52, "percentage": 76.02, "elapsed_time": "1 day, 13:15:19", "remaining_time": "11:44:56"} +{"current_steps": 21490, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.754942197209163e-06, "epoch": 1.52, "percentage": 76.06, "elapsed_time": "1 day, 13:16:21", "remaining_time": "11:43:53"} +{"current_steps": 21500, "total_steps": 28254, "loss": 0.7351, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.7359493060169475e-06, "epoch": 1.52, "percentage": 76.1, "elapsed_time": "1 day, 13:17:22", "remaining_time": "11:42:50"} +{"current_steps": 21510, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.716978995499887e-06, "epoch": 1.52, "percentage": 76.13, "elapsed_time": "1 day, 13:18:22", "remaining_time": "11:41:47"} +{"current_steps": 21520, "total_steps": 28254, "loss": 0.6966, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.698031289111825e-06, "epoch": 1.52, "percentage": 76.17, "elapsed_time": "1 day, 13:19:25", "remaining_time": "11:40:45"} +{"current_steps": 21530, "total_steps": 28254, "loss": 0.7117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.679106210278682e-06, "epoch": 1.52, "percentage": 76.2, "elapsed_time": "1 day, 13:20:26", "remaining_time": "11:39:42"} +{"current_steps": 21540, "total_steps": 28254, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.660203782398383e-06, "epoch": 1.52, "percentage": 76.24, "elapsed_time": "1 day, 13:21:29", "remaining_time": "11:38:40"} +{"current_steps": 21550, "total_steps": 28254, "loss": 0.712, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.641324028840865e-06, "epoch": 1.53, "percentage": 76.27, "elapsed_time": "1 day, 13:22:31", "remaining_time": "11:37:37"} +{"current_steps": 21560, "total_steps": 28254, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.622466972948016e-06, "epoch": 1.53, "percentage": 76.31, "elapsed_time": "1 day, 13:23:31", "remaining_time": "11:36:34"} +{"current_steps": 21570, "total_steps": 28254, "loss": 0.7101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.603632638033683e-06, "epoch": 1.53, "percentage": 76.34, "elapsed_time": "1 day, 13:24:34", "remaining_time": "11:35:32"} +{"current_steps": 21580, "total_steps": 28254, "loss": 0.7027, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.584821047383594e-06, "epoch": 1.53, "percentage": 76.38, "elapsed_time": "1 day, 13:25:37", "remaining_time": "11:34:29"} +{"current_steps": 21590, "total_steps": 28254, "loss": 0.7388, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.566032224255389e-06, "epoch": 1.53, "percentage": 76.41, "elapsed_time": "1 day, 13:26:39", "remaining_time": "11:33:27"} +{"current_steps": 21600, "total_steps": 28254, "loss": 0.6844, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.547266191878529e-06, "epoch": 1.53, "percentage": 76.45, "elapsed_time": "1 day, 13:27:39", "remaining_time": "11:32:24"} +{"current_steps": 21610, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.528522973454315e-06, "epoch": 1.53, "percentage": 76.48, "elapsed_time": "1 day, 13:28:41", "remaining_time": "11:31:21"} +{"current_steps": 21620, "total_steps": 28254, "loss": 0.7233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.509802592155851e-06, "epoch": 1.53, "percentage": 76.52, "elapsed_time": "1 day, 13:29:45", "remaining_time": "11:30:19"} +{"current_steps": 21630, "total_steps": 28254, "loss": 0.6955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.491105071127984e-06, "epoch": 1.53, "percentage": 76.56, "elapsed_time": "1 day, 13:30:48", "remaining_time": "11:29:17"} +{"current_steps": 21640, "total_steps": 28254, "loss": 0.7329, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.4724304334873e-06, "epoch": 1.53, "percentage": 76.59, "elapsed_time": "1 day, 13:31:50", "remaining_time": "11:28:14"} +{"current_steps": 21650, "total_steps": 28254, "loss": 0.7384, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.453778702322114e-06, "epoch": 1.53, "percentage": 76.63, "elapsed_time": "1 day, 13:32:52", "remaining_time": "11:27:12"} +{"current_steps": 21660, "total_steps": 28254, "loss": 0.6645, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.435149900692411e-06, "epoch": 1.53, "percentage": 76.66, "elapsed_time": "1 day, 13:33:54", "remaining_time": "11:26:09"} +{"current_steps": 21670, "total_steps": 28254, "loss": 0.7142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.416544051629819e-06, "epoch": 1.53, "percentage": 76.7, "elapsed_time": "1 day, 13:34:58", "remaining_time": "11:25:07"} +{"current_steps": 21680, "total_steps": 28254, "loss": 0.7009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.397961178137584e-06, "epoch": 1.53, "percentage": 76.73, "elapsed_time": "1 day, 13:36:00", "remaining_time": "11:24:05"} +{"current_steps": 21690, "total_steps": 28254, "loss": 0.6876, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.3794013031905685e-06, "epoch": 1.54, "percentage": 76.77, "elapsed_time": "1 day, 13:37:03", "remaining_time": "11:23:02"} +{"current_steps": 21700, "total_steps": 28254, "loss": 0.7037, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.36086444973519e-06, "epoch": 1.54, "percentage": 76.8, "elapsed_time": "1 day, 13:38:05", "remaining_time": "11:22:00"} +{"current_steps": 21710, "total_steps": 28254, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.342350640689393e-06, "epoch": 1.54, "percentage": 76.84, "elapsed_time": "1 day, 13:39:06", "remaining_time": "11:20:57"} +{"current_steps": 21720, "total_steps": 28254, "loss": 0.7101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.323859898942649e-06, "epoch": 1.54, "percentage": 76.87, "elapsed_time": "1 day, 13:40:09", "remaining_time": "11:19:55"} +{"current_steps": 21730, "total_steps": 28254, "loss": 0.7238, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.305392247355893e-06, "epoch": 1.54, "percentage": 76.91, "elapsed_time": "1 day, 13:41:09", "remaining_time": "11:18:51"} +{"current_steps": 21740, "total_steps": 28254, "loss": 0.7183, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.2869477087615315e-06, "epoch": 1.54, "percentage": 76.94, "elapsed_time": "1 day, 13:42:10", "remaining_time": "11:17:49"} +{"current_steps": 21750, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.268526305963374e-06, "epoch": 1.54, "percentage": 76.98, "elapsed_time": "1 day, 13:43:12", "remaining_time": "11:16:46"} +{"current_steps": 21760, "total_steps": 28254, "loss": 0.697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.250128061736646e-06, "epoch": 1.54, "percentage": 77.02, "elapsed_time": "1 day, 13:44:15", "remaining_time": "11:15:44"} +{"current_steps": 21770, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.231752998827925e-06, "epoch": 1.54, "percentage": 77.05, "elapsed_time": "1 day, 13:45:16", "remaining_time": "11:14:41"} +{"current_steps": 21780, "total_steps": 28254, "loss": 0.7374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.213401139955144e-06, "epoch": 1.54, "percentage": 77.09, "elapsed_time": "1 day, 13:46:18", "remaining_time": "11:13:38"} +{"current_steps": 21790, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.195072507807529e-06, "epoch": 1.54, "percentage": 77.12, "elapsed_time": "1 day, 13:47:19", "remaining_time": "11:12:36"} +{"current_steps": 21800, "total_steps": 28254, "loss": 0.6946, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.17676712504561e-06, "epoch": 1.54, "percentage": 77.16, "elapsed_time": "1 day, 13:48:22", "remaining_time": "11:11:33"} +{"current_steps": 21810, "total_steps": 28254, "loss": 0.7179, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.1584850143011546e-06, "epoch": 1.54, "percentage": 77.19, "elapsed_time": "1 day, 13:49:24", "remaining_time": "11:10:31"} +{"current_steps": 21820, "total_steps": 28254, "loss": 0.6801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.140226198177176e-06, "epoch": 1.54, "percentage": 77.23, "elapsed_time": "1 day, 13:50:27", "remaining_time": "11:09:28"} +{"current_steps": 21830, "total_steps": 28254, "loss": 0.7136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.121990699247865e-06, "epoch": 1.55, "percentage": 77.26, "elapsed_time": "1 day, 13:51:31", "remaining_time": "11:08:27"} +{"current_steps": 21840, "total_steps": 28254, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.103778540058611e-06, "epoch": 1.55, "percentage": 77.3, "elapsed_time": "1 day, 13:52:33", "remaining_time": "11:07:24"} +{"current_steps": 21850, "total_steps": 28254, "loss": 0.683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.085589743125919e-06, "epoch": 1.55, "percentage": 77.33, "elapsed_time": "1 day, 13:53:37", "remaining_time": "11:06:22"} +{"current_steps": 21860, "total_steps": 28254, "loss": 0.7171, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.067424330937438e-06, "epoch": 1.55, "percentage": 77.37, "elapsed_time": "1 day, 13:54:42", "remaining_time": "11:05:20"} +{"current_steps": 21870, "total_steps": 28254, "loss": 0.7437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.0492823259518795e-06, "epoch": 1.55, "percentage": 77.4, "elapsed_time": "1 day, 13:55:44", "remaining_time": "11:04:18"} +{"current_steps": 21880, "total_steps": 28254, "loss": 0.6891, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.0311637505990394e-06, "epoch": 1.55, "percentage": 77.44, "elapsed_time": "1 day, 13:56:47", "remaining_time": "11:03:16"} +{"current_steps": 21890, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.013068627279725e-06, "epoch": 1.55, "percentage": 77.48, "elapsed_time": "1 day, 13:57:51", "remaining_time": "11:02:13"} +{"current_steps": 21900, "total_steps": 28254, "loss": 0.7382, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.994996978365763e-06, "epoch": 1.55, "percentage": 77.51, "elapsed_time": "1 day, 13:58:53", "remaining_time": "11:01:11"} +{"current_steps": 21910, "total_steps": 28254, "loss": 0.7512, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.97694882619996e-06, "epoch": 1.55, "percentage": 77.55, "elapsed_time": "1 day, 13:59:56", "remaining_time": "11:00:09"} +{"current_steps": 21920, "total_steps": 28254, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.9589241930960635e-06, "epoch": 1.55, "percentage": 77.58, "elapsed_time": "1 day, 14:01:00", "remaining_time": "10:59:07"} +{"current_steps": 21930, "total_steps": 28254, "loss": 0.7125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.940923101338733e-06, "epoch": 1.55, "percentage": 77.62, "elapsed_time": "1 day, 14:02:01", "remaining_time": "10:58:04"} +{"current_steps": 21940, "total_steps": 28254, "loss": 0.707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.922945573183544e-06, "epoch": 1.55, "percentage": 77.65, "elapsed_time": "1 day, 14:03:05", "remaining_time": "10:57:02"} +{"current_steps": 21950, "total_steps": 28254, "loss": 0.706, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.90499163085694e-06, "epoch": 1.55, "percentage": 77.69, "elapsed_time": "1 day, 14:04:06", "remaining_time": "10:55:59"} +{"current_steps": 21960, "total_steps": 28254, "loss": 0.7613, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.887061296556179e-06, "epoch": 1.55, "percentage": 77.72, "elapsed_time": "1 day, 14:05:10", "remaining_time": "10:54:57"} +{"current_steps": 21970, "total_steps": 28254, "loss": 0.751, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.869154592449364e-06, "epoch": 1.56, "percentage": 77.76, "elapsed_time": "1 day, 14:06:13", "remaining_time": "10:53:55"} +{"current_steps": 21980, "total_steps": 28254, "loss": 0.7164, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.8512715406753486e-06, "epoch": 1.56, "percentage": 77.79, "elapsed_time": "1 day, 14:07:16", "remaining_time": "10:52:52"} +{"current_steps": 21990, "total_steps": 28254, "loss": 0.7117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.8334121633437794e-06, "epoch": 1.56, "percentage": 77.83, "elapsed_time": "1 day, 14:08:19", "remaining_time": "10:51:50"} +{"current_steps": 22000, "total_steps": 28254, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.815576482534999e-06, "epoch": 1.56, "percentage": 77.87, "elapsed_time": "1 day, 14:09:22", "remaining_time": "10:50:48"} +{"current_steps": 22010, "total_steps": 28254, "loss": 0.687, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.797764520300083e-06, "epoch": 1.56, "percentage": 77.9, "elapsed_time": "1 day, 14:10:25", "remaining_time": "10:49:46"} +{"current_steps": 22020, "total_steps": 28254, "loss": 0.6959, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.7799762986607585e-06, "epoch": 1.56, "percentage": 77.94, "elapsed_time": "1 day, 14:11:28", "remaining_time": "10:48:43"} +{"current_steps": 22030, "total_steps": 28254, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.762211839609424e-06, "epoch": 1.56, "percentage": 77.97, "elapsed_time": "1 day, 14:12:30", "remaining_time": "10:47:41"} +{"current_steps": 22040, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.744471165109069e-06, "epoch": 1.56, "percentage": 78.01, "elapsed_time": "1 day, 14:13:33", "remaining_time": "10:46:39"} +{"current_steps": 22050, "total_steps": 28254, "loss": 0.718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.726754297093315e-06, "epoch": 1.56, "percentage": 78.04, "elapsed_time": "1 day, 14:14:36", "remaining_time": "10:45:36"} +{"current_steps": 22060, "total_steps": 28254, "loss": 0.7166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.709061257466314e-06, "epoch": 1.56, "percentage": 78.08, "elapsed_time": "1 day, 14:15:38", "remaining_time": "10:44:34"} +{"current_steps": 22070, "total_steps": 28254, "loss": 0.6881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.691392068102786e-06, "epoch": 1.56, "percentage": 78.11, "elapsed_time": "1 day, 14:16:39", "remaining_time": "10:43:31"} +{"current_steps": 22080, "total_steps": 28254, "loss": 0.7015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.673746750847938e-06, "epoch": 1.56, "percentage": 78.15, "elapsed_time": "1 day, 14:17:42", "remaining_time": "10:42:29"} +{"current_steps": 22090, "total_steps": 28254, "loss": 0.7148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.656125327517495e-06, "epoch": 1.56, "percentage": 78.18, "elapsed_time": "1 day, 14:18:45", "remaining_time": "10:41:26"} +{"current_steps": 22100, "total_steps": 28254, "loss": 0.7374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.638527819897607e-06, "epoch": 1.56, "percentage": 78.22, "elapsed_time": "1 day, 14:19:47", "remaining_time": "10:40:24"} +{"current_steps": 22110, "total_steps": 28254, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.620954249744884e-06, "epoch": 1.57, "percentage": 78.25, "elapsed_time": "1 day, 14:20:50", "remaining_time": "10:39:22"} +{"current_steps": 22120, "total_steps": 28254, "loss": 0.7184, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.6034046387863165e-06, "epoch": 1.57, "percentage": 78.29, "elapsed_time": "1 day, 14:21:53", "remaining_time": "10:38:19"} +{"current_steps": 22130, "total_steps": 28254, "loss": 0.7096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.585879008719297e-06, "epoch": 1.57, "percentage": 78.33, "elapsed_time": "1 day, 14:22:55", "remaining_time": "10:37:17"} +{"current_steps": 22140, "total_steps": 28254, "loss": 0.6917, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.568377381211548e-06, "epoch": 1.57, "percentage": 78.36, "elapsed_time": "1 day, 14:23:59", "remaining_time": "10:36:15"} +{"current_steps": 22150, "total_steps": 28254, "loss": 0.7112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.550899777901136e-06, "epoch": 1.57, "percentage": 78.4, "elapsed_time": "1 day, 14:25:03", "remaining_time": "10:35:13"} +{"current_steps": 22160, "total_steps": 28254, "loss": 0.7252, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.533446220396404e-06, "epoch": 1.57, "percentage": 78.43, "elapsed_time": "1 day, 14:26:08", "remaining_time": "10:34:11"} +{"current_steps": 22170, "total_steps": 28254, "loss": 0.664, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.5160167302759884e-06, "epoch": 1.57, "percentage": 78.47, "elapsed_time": "1 day, 14:27:11", "remaining_time": "10:33:08"} +{"current_steps": 22180, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.498611329088751e-06, "epoch": 1.57, "percentage": 78.5, "elapsed_time": "1 day, 14:28:13", "remaining_time": "10:32:06"} +{"current_steps": 22190, "total_steps": 28254, "loss": 0.7, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.481230038353782e-06, "epoch": 1.57, "percentage": 78.54, "elapsed_time": "1 day, 14:29:16", "remaining_time": "10:31:04"} +{"current_steps": 22200, "total_steps": 28254, "loss": 0.7235, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.463872879560366e-06, "epoch": 1.57, "percentage": 78.57, "elapsed_time": "1 day, 14:30:19", "remaining_time": "10:30:01"} +{"current_steps": 22210, "total_steps": 28254, "loss": 0.6844, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.4465398741679386e-06, "epoch": 1.57, "percentage": 78.61, "elapsed_time": "1 day, 14:31:22", "remaining_time": "10:28:59"} +{"current_steps": 22220, "total_steps": 28254, "loss": 0.7504, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.42923104360609e-06, "epoch": 1.57, "percentage": 78.64, "elapsed_time": "1 day, 14:32:25", "remaining_time": "10:27:57"} +{"current_steps": 22230, "total_steps": 28254, "loss": 0.6676, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.411946409274501e-06, "epoch": 1.57, "percentage": 78.68, "elapsed_time": "1 day, 14:33:28", "remaining_time": "10:26:55"} +{"current_steps": 22240, "total_steps": 28254, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.394685992542964e-06, "epoch": 1.57, "percentage": 78.71, "elapsed_time": "1 day, 14:34:30", "remaining_time": "10:25:52"} +{"current_steps": 22250, "total_steps": 28254, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.377449814751304e-06, "epoch": 1.57, "percentage": 78.75, "elapsed_time": "1 day, 14:35:34", "remaining_time": "10:24:50"} +{"current_steps": 22260, "total_steps": 28254, "loss": 0.7328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.3602378972094e-06, "epoch": 1.58, "percentage": 78.79, "elapsed_time": "1 day, 14:36:36", "remaining_time": "10:23:48"} +{"current_steps": 22270, "total_steps": 28254, "loss": 0.6915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.343050261197116e-06, "epoch": 1.58, "percentage": 78.82, "elapsed_time": "1 day, 14:37:40", "remaining_time": "10:22:45"} +{"current_steps": 22280, "total_steps": 28254, "loss": 0.6845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.325886927964319e-06, "epoch": 1.58, "percentage": 78.86, "elapsed_time": "1 day, 14:38:43", "remaining_time": "10:21:43"} +{"current_steps": 22290, "total_steps": 28254, "loss": 0.7038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.308747918730806e-06, "epoch": 1.58, "percentage": 78.89, "elapsed_time": "1 day, 14:39:44", "remaining_time": "10:20:40"} +{"current_steps": 22300, "total_steps": 28254, "loss": 0.6908, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.29163325468632e-06, "epoch": 1.58, "percentage": 78.93, "elapsed_time": "1 day, 14:40:45", "remaining_time": "10:19:38"} +{"current_steps": 22310, "total_steps": 28254, "loss": 0.7001, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.274542956990491e-06, "epoch": 1.58, "percentage": 78.96, "elapsed_time": "1 day, 14:41:46", "remaining_time": "10:18:34"} +{"current_steps": 22320, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.257477046772844e-06, "epoch": 1.58, "percentage": 79.0, "elapsed_time": "1 day, 14:42:45", "remaining_time": "10:17:31"} +{"current_steps": 22330, "total_steps": 28254, "loss": 0.705, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.240435545132716e-06, "epoch": 1.58, "percentage": 79.03, "elapsed_time": "1 day, 14:43:49", "remaining_time": "10:16:29"} +{"current_steps": 22340, "total_steps": 28254, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.22341847313931e-06, "epoch": 1.58, "percentage": 79.07, "elapsed_time": "1 day, 14:44:51", "remaining_time": "10:15:27"} +{"current_steps": 22350, "total_steps": 28254, "loss": 0.7245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.206425851831592e-06, "epoch": 1.58, "percentage": 79.1, "elapsed_time": "1 day, 14:45:53", "remaining_time": "10:14:24"} +{"current_steps": 22360, "total_steps": 28254, "loss": 0.7323, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.18945770221832e-06, "epoch": 1.58, "percentage": 79.14, "elapsed_time": "1 day, 14:46:54", "remaining_time": "10:13:21"} +{"current_steps": 22370, "total_steps": 28254, "loss": 0.7015, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.172514045277979e-06, "epoch": 1.58, "percentage": 79.17, "elapsed_time": "1 day, 14:47:55", "remaining_time": "10:12:18"} +{"current_steps": 22380, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.155594901958791e-06, "epoch": 1.58, "percentage": 79.21, "elapsed_time": "1 day, 14:48:57", "remaining_time": "10:11:16"} +{"current_steps": 22390, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.13870029317865e-06, "epoch": 1.58, "percentage": 79.25, "elapsed_time": "1 day, 14:50:00", "remaining_time": "10:10:14"} +{"current_steps": 22400, "total_steps": 28254, "loss": 0.7217, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.12183023982514e-06, "epoch": 1.59, "percentage": 79.28, "elapsed_time": "1 day, 14:51:01", "remaining_time": "10:09:11"} +{"current_steps": 22410, "total_steps": 28254, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.1049847627554634e-06, "epoch": 1.59, "percentage": 79.32, "elapsed_time": "1 day, 14:52:02", "remaining_time": "10:08:08"} +{"current_steps": 22420, "total_steps": 28254, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.088163882796448e-06, "epoch": 1.59, "percentage": 79.35, "elapsed_time": "1 day, 14:53:05", "remaining_time": "10:07:06"} +{"current_steps": 22430, "total_steps": 28254, "loss": 0.7336, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.071367620744527e-06, "epoch": 1.59, "percentage": 79.39, "elapsed_time": "1 day, 14:54:08", "remaining_time": "10:06:03"} +{"current_steps": 22440, "total_steps": 28254, "loss": 0.7309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.054595997365671e-06, "epoch": 1.59, "percentage": 79.42, "elapsed_time": "1 day, 14:55:14", "remaining_time": "10:05:02"} +{"current_steps": 22450, "total_steps": 28254, "loss": 0.6978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.037849033395392e-06, "epoch": 1.59, "percentage": 79.46, "elapsed_time": "1 day, 14:56:16", "remaining_time": "10:03:59"} +{"current_steps": 22460, "total_steps": 28254, "loss": 0.7039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.0211267495387295e-06, "epoch": 1.59, "percentage": 79.49, "elapsed_time": "1 day, 14:57:18", "remaining_time": "10:02:57"} +{"current_steps": 22470, "total_steps": 28254, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.004429166470209e-06, "epoch": 1.59, "percentage": 79.53, "elapsed_time": "1 day, 14:58:19", "remaining_time": "10:01:54"} +{"current_steps": 22480, "total_steps": 28254, "loss": 0.6851, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.987756304833796e-06, "epoch": 1.59, "percentage": 79.56, "elapsed_time": "1 day, 14:59:22", "remaining_time": "10:00:52"} +{"current_steps": 22490, "total_steps": 28254, "loss": 0.7255, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.972771883223115e-06, "epoch": 1.59, "percentage": 79.6, "elapsed_time": "1 day, 15:00:25", "remaining_time": "9:59:49"} +{"current_steps": 22500, "total_steps": 28254, "loss": 0.7188, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.956146049072402e-06, "epoch": 1.59, "percentage": 79.63, "elapsed_time": "1 day, 15:01:28", "remaining_time": "9:58:47"} +{"current_steps": 22510, "total_steps": 28254, "loss": 0.7236, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.939544996048415e-06, "epoch": 1.59, "percentage": 79.67, "elapsed_time": "1 day, 15:02:31", "remaining_time": "9:57:45"} +{"current_steps": 22520, "total_steps": 28254, "loss": 0.7312, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.922968744675788e-06, "epoch": 1.59, "percentage": 79.71, "elapsed_time": "1 day, 15:03:36", "remaining_time": "9:56:43"} +{"current_steps": 22530, "total_steps": 28254, "loss": 0.7279, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.9064173154485086e-06, "epoch": 1.59, "percentage": 79.74, "elapsed_time": "1 day, 15:04:41", "remaining_time": "9:55:41"} +{"current_steps": 22540, "total_steps": 28254, "loss": 0.6995, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.889890728829832e-06, "epoch": 1.6, "percentage": 79.78, "elapsed_time": "1 day, 15:05:44", "remaining_time": "9:54:39"} +{"current_steps": 22550, "total_steps": 28254, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.8733890052523434e-06, "epoch": 1.6, "percentage": 79.81, "elapsed_time": "1 day, 15:06:46", "remaining_time": "9:53:36"} +{"current_steps": 22560, "total_steps": 28254, "loss": 0.6899, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.856912165117871e-06, "epoch": 1.6, "percentage": 79.85, "elapsed_time": "1 day, 15:07:50", "remaining_time": "9:52:34"} +{"current_steps": 22570, "total_steps": 28254, "loss": 0.698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.840460228797489e-06, "epoch": 1.6, "percentage": 79.88, "elapsed_time": "1 day, 15:08:54", "remaining_time": "9:51:32"} +{"current_steps": 22580, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.824033216631463e-06, "epoch": 1.6, "percentage": 79.92, "elapsed_time": "1 day, 15:09:56", "remaining_time": "9:50:30"} +{"current_steps": 22590, "total_steps": 28254, "loss": 0.718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.807631148929248e-06, "epoch": 1.6, "percentage": 79.95, "elapsed_time": "1 day, 15:11:01", "remaining_time": "9:49:28"} +{"current_steps": 22600, "total_steps": 28254, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.791254045969476e-06, "epoch": 1.6, "percentage": 79.99, "elapsed_time": "1 day, 15:12:05", "remaining_time": "9:48:26"} +{"current_steps": 22610, "total_steps": 28254, "loss": 0.7076, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.774901927999906e-06, "epoch": 1.6, "percentage": 80.02, "elapsed_time": "1 day, 15:13:07", "remaining_time": "9:47:23"} +{"current_steps": 22620, "total_steps": 28254, "loss": 0.7187, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.758574815237396e-06, "epoch": 1.6, "percentage": 80.06, "elapsed_time": "1 day, 15:14:08", "remaining_time": "9:46:21"} +{"current_steps": 22630, "total_steps": 28254, "loss": 0.7161, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.742272727867894e-06, "epoch": 1.6, "percentage": 80.09, "elapsed_time": "1 day, 15:15:12", "remaining_time": "9:45:18"} +{"current_steps": 22640, "total_steps": 28254, "loss": 0.7227, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7259956860464165e-06, "epoch": 1.6, "percentage": 80.13, "elapsed_time": "1 day, 15:16:14", "remaining_time": "9:44:16"} +{"current_steps": 22650, "total_steps": 28254, "loss": 0.7202, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.711367778983819e-06, "epoch": 1.6, "percentage": 80.17, "elapsed_time": "1 day, 15:17:17", "remaining_time": "9:43:14"} +{"current_steps": 22660, "total_steps": 28254, "loss": 0.7038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.695138379119721e-06, "epoch": 1.6, "percentage": 80.2, "elapsed_time": "1 day, 15:18:21", "remaining_time": "9:42:12"} +{"current_steps": 22670, "total_steps": 28254, "loss": 0.7102, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.678934083077979e-06, "epoch": 1.6, "percentage": 80.24, "elapsed_time": "1 day, 15:19:25", "remaining_time": "9:41:09"} +{"current_steps": 22680, "total_steps": 28254, "loss": 0.6974, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.662754910892711e-06, "epoch": 1.61, "percentage": 80.27, "elapsed_time": "1 day, 15:20:29", "remaining_time": "9:40:07"} +{"current_steps": 22690, "total_steps": 28254, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.646600882566954e-06, "epoch": 1.61, "percentage": 80.31, "elapsed_time": "1 day, 15:21:33", "remaining_time": "9:39:05"} +{"current_steps": 22700, "total_steps": 28254, "loss": 0.6789, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.630472018072659e-06, "epoch": 1.61, "percentage": 80.34, "elapsed_time": "1 day, 15:22:38", "remaining_time": "9:38:03"} +{"current_steps": 22710, "total_steps": 28254, "loss": 0.7192, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.614368337350686e-06, "epoch": 1.61, "percentage": 80.38, "elapsed_time": "1 day, 15:23:40", "remaining_time": "9:37:01"} +{"current_steps": 22720, "total_steps": 28254, "loss": 0.6817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.598289860310745e-06, "epoch": 1.61, "percentage": 80.41, "elapsed_time": "1 day, 15:24:42", "remaining_time": "9:35:58"} +{"current_steps": 22730, "total_steps": 28254, "loss": 0.7246, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.582236606831378e-06, "epoch": 1.61, "percentage": 80.45, "elapsed_time": "1 day, 15:25:42", "remaining_time": "9:34:55"} +{"current_steps": 22740, "total_steps": 28254, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.566208596759963e-06, "epoch": 1.61, "percentage": 80.48, "elapsed_time": "1 day, 15:26:45", "remaining_time": "9:33:53"} +{"current_steps": 22750, "total_steps": 28254, "loss": 0.691, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.550205849912648e-06, "epoch": 1.61, "percentage": 80.52, "elapsed_time": "1 day, 15:27:49", "remaining_time": "9:32:51"} +{"current_steps": 22760, "total_steps": 28254, "loss": 0.7319, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.534228386074363e-06, "epoch": 1.61, "percentage": 80.55, "elapsed_time": "1 day, 15:28:53", "remaining_time": "9:31:49"} +{"current_steps": 22770, "total_steps": 28254, "loss": 0.7048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.51827622499876e-06, "epoch": 1.61, "percentage": 80.59, "elapsed_time": "1 day, 15:29:57", "remaining_time": "9:30:47"} +{"current_steps": 22780, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.502349386408236e-06, "epoch": 1.61, "percentage": 80.63, "elapsed_time": "1 day, 15:31:02", "remaining_time": "9:29:45"} +{"current_steps": 22790, "total_steps": 28254, "loss": 0.6948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.48644788999385e-06, "epoch": 1.61, "percentage": 80.66, "elapsed_time": "1 day, 15:32:04", "remaining_time": "9:28:42"} +{"current_steps": 22800, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.470571755415354e-06, "epoch": 1.61, "percentage": 80.7, "elapsed_time": "1 day, 15:33:05", "remaining_time": "9:27:40"} +{"current_steps": 22810, "total_steps": 28254, "loss": 0.7407, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.454721002301127e-06, "epoch": 1.61, "percentage": 80.73, "elapsed_time": "1 day, 15:34:07", "remaining_time": "9:26:37"} +{"current_steps": 22820, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.438895650248184e-06, "epoch": 1.62, "percentage": 80.77, "elapsed_time": "1 day, 15:35:10", "remaining_time": "9:25:35"} +{"current_steps": 22830, "total_steps": 28254, "loss": 0.6924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.423095718822112e-06, "epoch": 1.62, "percentage": 80.8, "elapsed_time": "1 day, 15:36:13", "remaining_time": "9:24:32"} +{"current_steps": 22840, "total_steps": 28254, "loss": 0.7243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.4073212275570954e-06, "epoch": 1.62, "percentage": 80.84, "elapsed_time": "1 day, 15:37:16", "remaining_time": "9:23:30"} +{"current_steps": 22850, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3915721959558534e-06, "epoch": 1.62, "percentage": 80.87, "elapsed_time": "1 day, 15:38:21", "remaining_time": "9:22:28"} +{"current_steps": 22860, "total_steps": 28254, "loss": 0.7117, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.37584864348963e-06, "epoch": 1.62, "percentage": 80.91, "elapsed_time": "1 day, 15:39:22", "remaining_time": "9:21:25"} +{"current_steps": 22870, "total_steps": 28254, "loss": 0.692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.360150589598156e-06, "epoch": 1.62, "percentage": 80.94, "elapsed_time": "1 day, 15:40:26", "remaining_time": "9:20:23"} +{"current_steps": 22880, "total_steps": 28254, "loss": 0.7245, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.344478053689652e-06, "epoch": 1.62, "percentage": 80.98, "elapsed_time": "1 day, 15:41:27", "remaining_time": "9:19:21"} +{"current_steps": 22890, "total_steps": 28254, "loss": 0.7022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.328831055140798e-06, "epoch": 1.62, "percentage": 81.02, "elapsed_time": "1 day, 15:42:30", "remaining_time": "9:18:18"} +{"current_steps": 22900, "total_steps": 28254, "loss": 0.7265, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.313209613296679e-06, "epoch": 1.62, "percentage": 81.05, "elapsed_time": "1 day, 15:43:33", "remaining_time": "9:17:16"} +{"current_steps": 22910, "total_steps": 28254, "loss": 0.7039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.297613747470789e-06, "epoch": 1.62, "percentage": 81.09, "elapsed_time": "1 day, 15:44:34", "remaining_time": "9:16:13"} +{"current_steps": 22920, "total_steps": 28254, "loss": 0.6811, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.282043476945008e-06, "epoch": 1.62, "percentage": 81.12, "elapsed_time": "1 day, 15:45:35", "remaining_time": "9:15:10"} +{"current_steps": 22930, "total_steps": 28254, "loss": 0.6649, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2664988209695775e-06, "epoch": 1.62, "percentage": 81.16, "elapsed_time": "1 day, 15:46:41", "remaining_time": "9:14:09"} +{"current_steps": 22940, "total_steps": 28254, "loss": 0.6998, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.250979798763052e-06, "epoch": 1.62, "percentage": 81.19, "elapsed_time": "1 day, 15:47:43", "remaining_time": "9:13:06"} +{"current_steps": 22950, "total_steps": 28254, "loss": 0.7433, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2354864295123e-06, "epoch": 1.62, "percentage": 81.23, "elapsed_time": "1 day, 15:48:47", "remaining_time": "9:12:04"} +{"current_steps": 22960, "total_steps": 28254, "loss": 0.7184, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.220018732372485e-06, "epoch": 1.63, "percentage": 81.26, "elapsed_time": "1 day, 15:49:50", "remaining_time": "9:11:02"} +{"current_steps": 22970, "total_steps": 28254, "loss": 0.7101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.204576726467027e-06, "epoch": 1.63, "percentage": 81.3, "elapsed_time": "1 day, 15:50:53", "remaining_time": "9:09:59"} +{"current_steps": 22980, "total_steps": 28254, "loss": 0.7722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1891604308875706e-06, "epoch": 1.63, "percentage": 81.33, "elapsed_time": "1 day, 15:51:55", "remaining_time": "9:08:57"} +{"current_steps": 22990, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.17376986469398e-06, "epoch": 1.63, "percentage": 81.37, "elapsed_time": "1 day, 15:53:00", "remaining_time": "9:07:55"} +{"current_steps": 23000, "total_steps": 28254, "loss": 0.6903, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.158405046914315e-06, "epoch": 1.63, "percentage": 81.4, "elapsed_time": "1 day, 15:54:02", "remaining_time": "9:06:52"} +{"current_steps": 23010, "total_steps": 28254, "loss": 0.7359, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.143065996544804e-06, "epoch": 1.63, "percentage": 81.44, "elapsed_time": "1 day, 15:55:03", "remaining_time": "9:05:50"} +{"current_steps": 23020, "total_steps": 28254, "loss": 0.6894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1277527325498e-06, "epoch": 1.63, "percentage": 81.48, "elapsed_time": "1 day, 15:56:04", "remaining_time": "9:04:47"} +{"current_steps": 23030, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.112465273861799e-06, "epoch": 1.63, "percentage": 81.51, "elapsed_time": "1 day, 15:57:06", "remaining_time": "9:03:44"} +{"current_steps": 23040, "total_steps": 28254, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.097203639381364e-06, "epoch": 1.63, "percentage": 81.55, "elapsed_time": "1 day, 15:58:09", "remaining_time": "9:02:42"} +{"current_steps": 23050, "total_steps": 28254, "loss": 0.7038, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.081967847977164e-06, "epoch": 1.63, "percentage": 81.58, "elapsed_time": "1 day, 15:59:11", "remaining_time": "9:01:39"} +{"current_steps": 23060, "total_steps": 28254, "loss": 0.711, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.066757918485886e-06, "epoch": 1.63, "percentage": 81.62, "elapsed_time": "1 day, 16:00:14", "remaining_time": "9:00:37"} +{"current_steps": 23070, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.0515738697122694e-06, "epoch": 1.63, "percentage": 81.65, "elapsed_time": "1 day, 16:01:15", "remaining_time": "8:59:34"} +{"current_steps": 23080, "total_steps": 28254, "loss": 0.7134, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.036415720429027e-06, "epoch": 1.63, "percentage": 81.69, "elapsed_time": "1 day, 16:02:17", "remaining_time": "8:58:32"} +{"current_steps": 23090, "total_steps": 28254, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.021283489376876e-06, "epoch": 1.63, "percentage": 81.72, "elapsed_time": "1 day, 16:03:18", "remaining_time": "8:57:29"} +{"current_steps": 23100, "total_steps": 28254, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.006177195264488e-06, "epoch": 1.64, "percentage": 81.76, "elapsed_time": "1 day, 16:04:21", "remaining_time": "8:56:27"} +{"current_steps": 23110, "total_steps": 28254, "loss": 0.6872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.9910968567684506e-06, "epoch": 1.64, "percentage": 81.79, "elapsed_time": "1 day, 16:05:23", "remaining_time": "8:55:24"} +{"current_steps": 23120, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.976042492533269e-06, "epoch": 1.64, "percentage": 81.83, "elapsed_time": "1 day, 16:06:25", "remaining_time": "8:54:22"} +{"current_steps": 23130, "total_steps": 28254, "loss": 0.7437, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.961014121171342e-06, "epoch": 1.64, "percentage": 81.86, "elapsed_time": "1 day, 16:07:27", "remaining_time": "8:53:19"} +{"current_steps": 23140, "total_steps": 28254, "loss": 0.7111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.946011761262932e-06, "epoch": 1.64, "percentage": 81.9, "elapsed_time": "1 day, 16:08:28", "remaining_time": "8:52:16"} +{"current_steps": 23150, "total_steps": 28254, "loss": 0.697, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.931035431356134e-06, "epoch": 1.64, "percentage": 81.94, "elapsed_time": "1 day, 16:09:31", "remaining_time": "8:51:14"} +{"current_steps": 23160, "total_steps": 28254, "loss": 0.7258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.916085149966856e-06, "epoch": 1.64, "percentage": 81.97, "elapsed_time": "1 day, 16:10:33", "remaining_time": "8:50:11"} +{"current_steps": 23170, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.901160935578815e-06, "epoch": 1.64, "percentage": 82.01, "elapsed_time": "1 day, 16:11:36", "remaining_time": "8:49:09"} +{"current_steps": 23180, "total_steps": 28254, "loss": 0.686, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8862628066435065e-06, "epoch": 1.64, "percentage": 82.04, "elapsed_time": "1 day, 16:12:41", "remaining_time": "8:48:07"} +{"current_steps": 23190, "total_steps": 28254, "loss": 0.6994, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8713907815801534e-06, "epoch": 1.64, "percentage": 82.08, "elapsed_time": "1 day, 16:13:42", "remaining_time": "8:47:04"} +{"current_steps": 23200, "total_steps": 28254, "loss": 0.7039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.856544878775708e-06, "epoch": 1.64, "percentage": 82.11, "elapsed_time": "1 day, 16:14:43", "remaining_time": "8:46:02"} +{"current_steps": 23210, "total_steps": 28254, "loss": 0.7096, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.841725116584846e-06, "epoch": 1.64, "percentage": 82.15, "elapsed_time": "1 day, 16:15:46", "remaining_time": "8:44:59"} +{"current_steps": 23220, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8269315133299126e-06, "epoch": 1.64, "percentage": 82.18, "elapsed_time": "1 day, 16:16:47", "remaining_time": "8:43:57"} +{"current_steps": 23230, "total_steps": 28254, "loss": 0.7133, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8121640873009067e-06, "epoch": 1.64, "percentage": 82.22, "elapsed_time": "1 day, 16:17:50", "remaining_time": "8:42:54"} +{"current_steps": 23240, "total_steps": 28254, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7974228567554617e-06, "epoch": 1.64, "percentage": 82.25, "elapsed_time": "1 day, 16:18:54", "remaining_time": "8:41:52"} +{"current_steps": 23250, "total_steps": 28254, "loss": 0.7077, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7827078399188393e-06, "epoch": 1.65, "percentage": 82.29, "elapsed_time": "1 day, 16:19:54", "remaining_time": "8:40:49"} +{"current_steps": 23260, "total_steps": 28254, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7680190549838847e-06, "epoch": 1.65, "percentage": 82.32, "elapsed_time": "1 day, 16:20:57", "remaining_time": "8:39:47"} +{"current_steps": 23270, "total_steps": 28254, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.753356520111004e-06, "epoch": 1.65, "percentage": 82.36, "elapsed_time": "1 day, 16:21:59", "remaining_time": "8:38:44"} +{"current_steps": 23280, "total_steps": 28254, "loss": 0.7174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.738720253428152e-06, "epoch": 1.65, "percentage": 82.4, "elapsed_time": "1 day, 16:23:03", "remaining_time": "8:37:42"} +{"current_steps": 23290, "total_steps": 28254, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.724110273030812e-06, "epoch": 1.65, "percentage": 82.43, "elapsed_time": "1 day, 16:24:05", "remaining_time": "8:36:40"} +{"current_steps": 23300, "total_steps": 28254, "loss": 0.7094, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.709526596981977e-06, "epoch": 1.65, "percentage": 82.47, "elapsed_time": "1 day, 16:25:09", "remaining_time": "8:35:38"} +{"current_steps": 23310, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6949692433120907e-06, "epoch": 1.65, "percentage": 82.5, "elapsed_time": "1 day, 16:26:14", "remaining_time": "8:34:36"} +{"current_steps": 23320, "total_steps": 28254, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6804382300190844e-06, "epoch": 1.65, "percentage": 82.54, "elapsed_time": "1 day, 16:27:16", "remaining_time": "8:33:33"} +{"current_steps": 23330, "total_steps": 28254, "loss": 0.7247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.665933575068298e-06, "epoch": 1.65, "percentage": 82.57, "elapsed_time": "1 day, 16:28:18", "remaining_time": "8:32:30"} +{"current_steps": 23340, "total_steps": 28254, "loss": 0.7393, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6514552963925004e-06, "epoch": 1.65, "percentage": 82.61, "elapsed_time": "1 day, 16:29:20", "remaining_time": "8:31:28"} +{"current_steps": 23350, "total_steps": 28254, "loss": 0.7352, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.637003411891854e-06, "epoch": 1.65, "percentage": 82.64, "elapsed_time": "1 day, 16:30:22", "remaining_time": "8:30:25"} +{"current_steps": 23360, "total_steps": 28254, "loss": 0.6873, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.622577939433866e-06, "epoch": 1.65, "percentage": 82.68, "elapsed_time": "1 day, 16:31:24", "remaining_time": "8:29:23"} +{"current_steps": 23370, "total_steps": 28254, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.6081788968534066e-06, "epoch": 1.65, "percentage": 82.71, "elapsed_time": "1 day, 16:32:28", "remaining_time": "8:28:21"} +{"current_steps": 23380, "total_steps": 28254, "loss": 0.7287, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5938063019526653e-06, "epoch": 1.65, "percentage": 82.75, "elapsed_time": "1 day, 16:33:27", "remaining_time": "8:27:18"} +{"current_steps": 23390, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.579460172501142e-06, "epoch": 1.66, "percentage": 82.78, "elapsed_time": "1 day, 16:34:30", "remaining_time": "8:26:15"} +{"current_steps": 23400, "total_steps": 28254, "loss": 0.7258, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5651405262356024e-06, "epoch": 1.66, "percentage": 82.82, "elapsed_time": "1 day, 16:35:34", "remaining_time": "8:25:13"} +{"current_steps": 23410, "total_steps": 28254, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5508473808600674e-06, "epoch": 1.66, "percentage": 82.86, "elapsed_time": "1 day, 16:36:37", "remaining_time": "8:24:11"} +{"current_steps": 23420, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5365807540458097e-06, "epoch": 1.66, "percentage": 82.89, "elapsed_time": "1 day, 16:37:38", "remaining_time": "8:23:08"} +{"current_steps": 23430, "total_steps": 28254, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.522340663431314e-06, "epoch": 1.66, "percentage": 82.93, "elapsed_time": "1 day, 16:38:41", "remaining_time": "8:22:06"} +{"current_steps": 23440, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5081271266222434e-06, "epoch": 1.66, "percentage": 82.96, "elapsed_time": "1 day, 16:39:44", "remaining_time": "8:21:03"} +{"current_steps": 23450, "total_steps": 28254, "loss": 0.6804, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4939401611914337e-06, "epoch": 1.66, "percentage": 83.0, "elapsed_time": "1 day, 16:40:46", "remaining_time": "8:20:01"} +{"current_steps": 23460, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.479779784678877e-06, "epoch": 1.66, "percentage": 83.03, "elapsed_time": "1 day, 16:41:47", "remaining_time": "8:18:58"} +{"current_steps": 23470, "total_steps": 28254, "loss": 0.7182, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.465646014591703e-06, "epoch": 1.66, "percentage": 83.07, "elapsed_time": "1 day, 16:42:51", "remaining_time": "8:17:56"} +{"current_steps": 23480, "total_steps": 28254, "loss": 0.6964, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4515388684041193e-06, "epoch": 1.66, "percentage": 83.1, "elapsed_time": "1 day, 16:43:54", "remaining_time": "8:16:53"} +{"current_steps": 23490, "total_steps": 28254, "loss": 0.707, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.437458363557433e-06, "epoch": 1.66, "percentage": 83.14, "elapsed_time": "1 day, 16:44:56", "remaining_time": "8:15:51"} +{"current_steps": 23500, "total_steps": 28254, "loss": 0.6729, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4234045174600103e-06, "epoch": 1.66, "percentage": 83.17, "elapsed_time": "1 day, 16:46:00", "remaining_time": "8:14:49"} +{"current_steps": 23510, "total_steps": 28254, "loss": 0.6822, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.409377347487272e-06, "epoch": 1.66, "percentage": 83.21, "elapsed_time": "1 day, 16:47:00", "remaining_time": "8:13:46"} +{"current_steps": 23520, "total_steps": 28254, "loss": 0.7339, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.395376870981634e-06, "epoch": 1.66, "percentage": 83.24, "elapsed_time": "1 day, 16:48:03", "remaining_time": "8:12:44"} +{"current_steps": 23530, "total_steps": 28254, "loss": 0.7011, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3814031052525175e-06, "epoch": 1.67, "percentage": 83.28, "elapsed_time": "1 day, 16:49:05", "remaining_time": "8:11:41"} +{"current_steps": 23540, "total_steps": 28254, "loss": 0.7216, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.367456067576327e-06, "epoch": 1.67, "percentage": 83.32, "elapsed_time": "1 day, 16:50:08", "remaining_time": "8:10:39"} +{"current_steps": 23550, "total_steps": 28254, "loss": 0.7194, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.353535775196423e-06, "epoch": 1.67, "percentage": 83.35, "elapsed_time": "1 day, 16:51:10", "remaining_time": "8:09:36"} +{"current_steps": 23560, "total_steps": 28254, "loss": 0.7163, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.339642245323102e-06, "epoch": 1.67, "percentage": 83.39, "elapsed_time": "1 day, 16:52:14", "remaining_time": "8:08:34"} +{"current_steps": 23570, "total_steps": 28254, "loss": 0.736, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.325775495133546e-06, "epoch": 1.67, "percentage": 83.42, "elapsed_time": "1 day, 16:53:15", "remaining_time": "8:07:31"} +{"current_steps": 23580, "total_steps": 28254, "loss": 0.7071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.31193554177186e-06, "epoch": 1.67, "percentage": 83.46, "elapsed_time": "1 day, 16:54:17", "remaining_time": "8:06:29"} +{"current_steps": 23590, "total_steps": 28254, "loss": 0.6889, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.298122402349002e-06, "epoch": 1.67, "percentage": 83.49, "elapsed_time": "1 day, 16:55:21", "remaining_time": "8:05:26"} +{"current_steps": 23600, "total_steps": 28254, "loss": 0.6933, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2843360939427943e-06, "epoch": 1.67, "percentage": 83.53, "elapsed_time": "1 day, 16:56:23", "remaining_time": "8:04:24"} +{"current_steps": 23610, "total_steps": 28254, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.270576633597866e-06, "epoch": 1.67, "percentage": 83.56, "elapsed_time": "1 day, 16:57:26", "remaining_time": "8:03:22"} +{"current_steps": 23620, "total_steps": 28254, "loss": 0.6603, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2568440383256598e-06, "epoch": 1.67, "percentage": 83.6, "elapsed_time": "1 day, 16:58:28", "remaining_time": "8:02:19"} +{"current_steps": 23630, "total_steps": 28254, "loss": 0.6971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.243138325104411e-06, "epoch": 1.67, "percentage": 83.63, "elapsed_time": "1 day, 16:59:33", "remaining_time": "8:01:17"} +{"current_steps": 23640, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.230826181364585e-06, "epoch": 1.67, "percentage": 83.67, "elapsed_time": "1 day, 17:00:35", "remaining_time": "8:00:15"} +{"current_steps": 23650, "total_steps": 28254, "loss": 0.7277, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.217171590696141e-06, "epoch": 1.67, "percentage": 83.7, "elapsed_time": "1 day, 17:01:38", "remaining_time": "7:59:12"} +{"current_steps": 23660, "total_steps": 28254, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2035439311275006e-06, "epoch": 1.67, "percentage": 83.74, "elapsed_time": "1 day, 17:02:39", "remaining_time": "7:58:10"} +{"current_steps": 23670, "total_steps": 28254, "loss": 0.7204, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1899432195071575e-06, "epoch": 1.68, "percentage": 83.78, "elapsed_time": "1 day, 17:03:42", "remaining_time": "7:57:07"} +{"current_steps": 23680, "total_steps": 28254, "loss": 0.7241, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.176369472650292e-06, "epoch": 1.68, "percentage": 83.81, "elapsed_time": "1 day, 17:04:44", "remaining_time": "7:56:05"} +{"current_steps": 23690, "total_steps": 28254, "loss": 0.6712, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1628227073387474e-06, "epoch": 1.68, "percentage": 83.85, "elapsed_time": "1 day, 17:05:45", "remaining_time": "7:55:02"} +{"current_steps": 23700, "total_steps": 28254, "loss": 0.6877, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1493029403209973e-06, "epoch": 1.68, "percentage": 83.88, "elapsed_time": "1 day, 17:06:45", "remaining_time": "7:53:59"} +{"current_steps": 23710, "total_steps": 28254, "loss": 0.6953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1358101883121547e-06, "epoch": 1.68, "percentage": 83.92, "elapsed_time": "1 day, 17:07:49", "remaining_time": "7:52:57"} +{"current_steps": 23720, "total_steps": 28254, "loss": 0.6918, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.122344467993907e-06, "epoch": 1.68, "percentage": 83.95, "elapsed_time": "1 day, 17:08:50", "remaining_time": "7:51:54"} +{"current_steps": 23730, "total_steps": 28254, "loss": 0.6866, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1089057960145498e-06, "epoch": 1.68, "percentage": 83.99, "elapsed_time": "1 day, 17:09:55", "remaining_time": "7:50:52"} +{"current_steps": 23740, "total_steps": 28254, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0954941889889096e-06, "epoch": 1.68, "percentage": 84.02, "elapsed_time": "1 day, 17:10:57", "remaining_time": "7:49:50"} +{"current_steps": 23750, "total_steps": 28254, "loss": 0.7213, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.082109663498378e-06, "epoch": 1.68, "percentage": 84.06, "elapsed_time": "1 day, 17:12:00", "remaining_time": "7:48:47"} +{"current_steps": 23760, "total_steps": 28254, "loss": 0.7225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.068752236090841e-06, "epoch": 1.68, "percentage": 84.09, "elapsed_time": "1 day, 17:13:00", "remaining_time": "7:47:44"} +{"current_steps": 23770, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.055421923280702e-06, "epoch": 1.68, "percentage": 84.13, "elapsed_time": "1 day, 17:14:04", "remaining_time": "7:46:42"} +{"current_steps": 23780, "total_steps": 28254, "loss": 0.696, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0421187415488246e-06, "epoch": 1.68, "percentage": 84.17, "elapsed_time": "1 day, 17:15:06", "remaining_time": "7:45:40"} +{"current_steps": 23790, "total_steps": 28254, "loss": 0.7251, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.028842707342541e-06, "epoch": 1.68, "percentage": 84.2, "elapsed_time": "1 day, 17:16:08", "remaining_time": "7:44:37"} +{"current_steps": 23800, "total_steps": 28254, "loss": 0.7075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0155938370756116e-06, "epoch": 1.68, "percentage": 84.24, "elapsed_time": "1 day, 17:17:10", "remaining_time": "7:43:35"} +{"current_steps": 23810, "total_steps": 28254, "loss": 0.7181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0023721471282214e-06, "epoch": 1.69, "percentage": 84.27, "elapsed_time": "1 day, 17:18:13", "remaining_time": "7:42:32"} +{"current_steps": 23820, "total_steps": 28254, "loss": 0.6982, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9891776538469362e-06, "epoch": 1.69, "percentage": 84.31, "elapsed_time": "1 day, 17:19:15", "remaining_time": "7:41:30"} +{"current_steps": 23830, "total_steps": 28254, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9760103735447186e-06, "epoch": 1.69, "percentage": 84.34, "elapsed_time": "1 day, 17:20:18", "remaining_time": "7:40:27"} +{"current_steps": 23840, "total_steps": 28254, "loss": 0.7363, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.962870322500866e-06, "epoch": 1.69, "percentage": 84.38, "elapsed_time": "1 day, 17:21:20", "remaining_time": "7:39:25"} +{"current_steps": 23850, "total_steps": 28254, "loss": 0.6743, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9497575169610243e-06, "epoch": 1.69, "percentage": 84.41, "elapsed_time": "1 day, 17:22:23", "remaining_time": "7:38:22"} +{"current_steps": 23860, "total_steps": 28254, "loss": 0.7141, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9366719731371563e-06, "epoch": 1.69, "percentage": 84.45, "elapsed_time": "1 day, 17:23:23", "remaining_time": "7:37:20"} +{"current_steps": 23870, "total_steps": 28254, "loss": 0.7228, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9236137072075067e-06, "epoch": 1.69, "percentage": 84.48, "elapsed_time": "1 day, 17:24:25", "remaining_time": "7:36:17"} +{"current_steps": 23880, "total_steps": 28254, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.910582735316597e-06, "epoch": 1.69, "percentage": 84.52, "elapsed_time": "1 day, 17:25:26", "remaining_time": "7:35:14"} +{"current_steps": 23890, "total_steps": 28254, "loss": 0.7098, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8975790735752186e-06, "epoch": 1.69, "percentage": 84.55, "elapsed_time": "1 day, 17:26:29", "remaining_time": "7:34:12"} +{"current_steps": 23900, "total_steps": 28254, "loss": 0.6907, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8846027380603908e-06, "epoch": 1.69, "percentage": 84.59, "elapsed_time": "1 day, 17:27:31", "remaining_time": "7:33:09"} +{"current_steps": 23910, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8716537448153364e-06, "epoch": 1.69, "percentage": 84.63, "elapsed_time": "1 day, 17:28:33", "remaining_time": "7:32:07"} +{"current_steps": 23920, "total_steps": 28254, "loss": 0.7298, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8587321098494963e-06, "epoch": 1.69, "percentage": 84.66, "elapsed_time": "1 day, 17:29:35", "remaining_time": "7:31:05"} +{"current_steps": 23930, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8458378491384606e-06, "epoch": 1.69, "percentage": 84.7, "elapsed_time": "1 day, 17:30:38", "remaining_time": "7:30:02"} +{"current_steps": 23940, "total_steps": 28254, "loss": 0.7065, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.832970978624e-06, "epoch": 1.69, "percentage": 84.73, "elapsed_time": "1 day, 17:31:42", "remaining_time": "7:29:00"} +{"current_steps": 23950, "total_steps": 28254, "loss": 0.6787, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8201315142140055e-06, "epoch": 1.7, "percentage": 84.77, "elapsed_time": "1 day, 17:32:45", "remaining_time": "7:27:58"} +{"current_steps": 23960, "total_steps": 28254, "loss": 0.6846, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8073194717824935e-06, "epoch": 1.7, "percentage": 84.8, "elapsed_time": "1 day, 17:33:47", "remaining_time": "7:26:55"} +{"current_steps": 23970, "total_steps": 28254, "loss": 0.7014, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.794534867169568e-06, "epoch": 1.7, "percentage": 84.84, "elapsed_time": "1 day, 17:34:52", "remaining_time": "7:25:53"} +{"current_steps": 23980, "total_steps": 28254, "loss": 0.721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7817777161814245e-06, "epoch": 1.7, "percentage": 84.87, "elapsed_time": "1 day, 17:35:54", "remaining_time": "7:24:50"} +{"current_steps": 23990, "total_steps": 28254, "loss": 0.7459, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.769048034590299e-06, "epoch": 1.7, "percentage": 84.91, "elapsed_time": "1 day, 17:36:59", "remaining_time": "7:23:49"} +{"current_steps": 24000, "total_steps": 28254, "loss": 0.6801, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7563458381344858e-06, "epoch": 1.7, "percentage": 84.94, "elapsed_time": "1 day, 17:38:02", "remaining_time": "7:22:46"} +{"current_steps": 24010, "total_steps": 28254, "loss": 0.7142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7436711425182726e-06, "epoch": 1.7, "percentage": 84.98, "elapsed_time": "1 day, 17:39:03", "remaining_time": "7:21:43"} +{"current_steps": 24020, "total_steps": 28254, "loss": 0.6988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7310239634119744e-06, "epoch": 1.7, "percentage": 85.01, "elapsed_time": "1 day, 17:40:08", "remaining_time": "7:20:41"} +{"current_steps": 24030, "total_steps": 28254, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.718404316451864e-06, "epoch": 1.7, "percentage": 85.05, "elapsed_time": "1 day, 17:41:11", "remaining_time": "7:19:39"} +{"current_steps": 24040, "total_steps": 28254, "loss": 0.672, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7058122172401916e-06, "epoch": 1.7, "percentage": 85.09, "elapsed_time": "1 day, 17:42:14", "remaining_time": "7:18:37"} +{"current_steps": 24050, "total_steps": 28254, "loss": 0.7278, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.693247681345132e-06, "epoch": 1.7, "percentage": 85.12, "elapsed_time": "1 day, 17:43:18", "remaining_time": "7:17:35"} +{"current_steps": 24060, "total_steps": 28254, "loss": 0.7055, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.680710724300803e-06, "epoch": 1.7, "percentage": 85.16, "elapsed_time": "1 day, 17:44:20", "remaining_time": "7:16:32"} +{"current_steps": 24070, "total_steps": 28254, "loss": 0.7266, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6682013616072005e-06, "epoch": 1.7, "percentage": 85.19, "elapsed_time": "1 day, 17:45:25", "remaining_time": "7:15:30"} +{"current_steps": 24080, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.655719608730231e-06, "epoch": 1.7, "percentage": 85.23, "elapsed_time": "1 day, 17:46:27", "remaining_time": "7:14:28"} +{"current_steps": 24090, "total_steps": 28254, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6432654811016395e-06, "epoch": 1.71, "percentage": 85.26, "elapsed_time": "1 day, 17:47:33", "remaining_time": "7:13:26"} +{"current_steps": 24100, "total_steps": 28254, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.630838994119042e-06, "epoch": 1.71, "percentage": 85.3, "elapsed_time": "1 day, 17:48:37", "remaining_time": "7:12:23"} +{"current_steps": 24110, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.618440163145855e-06, "epoch": 1.71, "percentage": 85.33, "elapsed_time": "1 day, 17:49:38", "remaining_time": "7:11:21"} +{"current_steps": 24120, "total_steps": 28254, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.60606900351133e-06, "epoch": 1.71, "percentage": 85.37, "elapsed_time": "1 day, 17:50:40", "remaining_time": "7:10:18"} +{"current_steps": 24130, "total_steps": 28254, "loss": 0.7071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5937255305104825e-06, "epoch": 1.71, "percentage": 85.4, "elapsed_time": "1 day, 17:51:42", "remaining_time": "7:09:16"} +{"current_steps": 24140, "total_steps": 28254, "loss": 0.6953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.581409759404113e-06, "epoch": 1.71, "percentage": 85.44, "elapsed_time": "1 day, 17:52:45", "remaining_time": "7:08:13"} +{"current_steps": 24150, "total_steps": 28254, "loss": 0.7178, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5691217054187726e-06, "epoch": 1.71, "percentage": 85.47, "elapsed_time": "1 day, 17:53:49", "remaining_time": "7:07:11"} +{"current_steps": 24160, "total_steps": 28254, "loss": 0.7034, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.556861383746731e-06, "epoch": 1.71, "percentage": 85.51, "elapsed_time": "1 day, 17:54:52", "remaining_time": "7:06:09"} +{"current_steps": 24170, "total_steps": 28254, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.54462880954599e-06, "epoch": 1.71, "percentage": 85.55, "elapsed_time": "1 day, 17:55:53", "remaining_time": "7:05:06"} +{"current_steps": 24180, "total_steps": 28254, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.532423997940231e-06, "epoch": 1.71, "percentage": 85.58, "elapsed_time": "1 day, 17:56:56", "remaining_time": "7:04:04"} +{"current_steps": 24190, "total_steps": 28254, "loss": 0.7106, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5202469640188187e-06, "epoch": 1.71, "percentage": 85.62, "elapsed_time": "1 day, 17:57:56", "remaining_time": "7:03:01"} +{"current_steps": 24200, "total_steps": 28254, "loss": 0.7149, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.508097722836769e-06, "epoch": 1.71, "percentage": 85.65, "elapsed_time": "1 day, 17:59:00", "remaining_time": "7:01:59"} +{"current_steps": 24210, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4959762894147503e-06, "epoch": 1.71, "percentage": 85.69, "elapsed_time": "1 day, 18:00:03", "remaining_time": "7:00:56"} +{"current_steps": 24220, "total_steps": 28254, "loss": 0.6858, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.48388267873903e-06, "epoch": 1.71, "percentage": 85.72, "elapsed_time": "1 day, 18:01:05", "remaining_time": "6:59:54"} +{"current_steps": 24230, "total_steps": 28254, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4718169057614953e-06, "epoch": 1.72, "percentage": 85.76, "elapsed_time": "1 day, 18:02:06", "remaining_time": "6:58:51"} +{"current_steps": 24240, "total_steps": 28254, "loss": 0.7374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4597789853996022e-06, "epoch": 1.72, "percentage": 85.79, "elapsed_time": "1 day, 18:03:08", "remaining_time": "6:57:48"} +{"current_steps": 24250, "total_steps": 28254, "loss": 0.6925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4477689325363875e-06, "epoch": 1.72, "percentage": 85.83, "elapsed_time": "1 day, 18:04:11", "remaining_time": "6:56:46"} +{"current_steps": 24260, "total_steps": 28254, "loss": 0.7254, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4357867620204174e-06, "epoch": 1.72, "percentage": 85.86, "elapsed_time": "1 day, 18:05:16", "remaining_time": "6:55:44"} +{"current_steps": 24270, "total_steps": 28254, "loss": 0.7108, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4238324886657976e-06, "epoch": 1.72, "percentage": 85.9, "elapsed_time": "1 day, 18:06:19", "remaining_time": "6:54:42"} +{"current_steps": 24280, "total_steps": 28254, "loss": 0.7205, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.411906127252134e-06, "epoch": 1.72, "percentage": 85.93, "elapsed_time": "1 day, 18:07:21", "remaining_time": "6:53:39"} +{"current_steps": 24290, "total_steps": 28254, "loss": 0.7125, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4000076925245364e-06, "epoch": 1.72, "percentage": 85.97, "elapsed_time": "1 day, 18:08:22", "remaining_time": "6:52:37"} +{"current_steps": 24300, "total_steps": 28254, "loss": 0.7166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.388137199193571e-06, "epoch": 1.72, "percentage": 86.01, "elapsed_time": "1 day, 18:09:27", "remaining_time": "6:51:35"} +{"current_steps": 24310, "total_steps": 28254, "loss": 0.7536, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3762946619352773e-06, "epoch": 1.72, "percentage": 86.04, "elapsed_time": "1 day, 18:10:30", "remaining_time": "6:50:32"} +{"current_steps": 24320, "total_steps": 28254, "loss": 0.6741, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.364480095391114e-06, "epoch": 1.72, "percentage": 86.08, "elapsed_time": "1 day, 18:11:33", "remaining_time": "6:49:30"} +{"current_steps": 24330, "total_steps": 28254, "loss": 0.7399, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3526935141679695e-06, "epoch": 1.72, "percentage": 86.11, "elapsed_time": "1 day, 18:12:34", "remaining_time": "6:48:27"} +{"current_steps": 24340, "total_steps": 28254, "loss": 0.6885, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3409349328381264e-06, "epoch": 1.72, "percentage": 86.15, "elapsed_time": "1 day, 18:13:37", "remaining_time": "6:47:25"} +{"current_steps": 24350, "total_steps": 28254, "loss": 0.6654, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.329204365939261e-06, "epoch": 1.72, "percentage": 86.18, "elapsed_time": "1 day, 18:14:39", "remaining_time": "6:46:22"} +{"current_steps": 24360, "total_steps": 28254, "loss": 0.7019, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3175018279743943e-06, "epoch": 1.72, "percentage": 86.22, "elapsed_time": "1 day, 18:15:41", "remaining_time": "6:45:20"} +{"current_steps": 24370, "total_steps": 28254, "loss": 0.6802, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.305827333411914e-06, "epoch": 1.72, "percentage": 86.25, "elapsed_time": "1 day, 18:16:41", "remaining_time": "6:44:17"} +{"current_steps": 24380, "total_steps": 28254, "loss": 0.6845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2941808966855217e-06, "epoch": 1.73, "percentage": 86.29, "elapsed_time": "1 day, 18:17:45", "remaining_time": "6:43:15"} +{"current_steps": 24390, "total_steps": 28254, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2825625321942433e-06, "epoch": 1.73, "percentage": 86.32, "elapsed_time": "1 day, 18:18:48", "remaining_time": "6:42:12"} +{"current_steps": 24400, "total_steps": 28254, "loss": 0.6803, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.270972254302381e-06, "epoch": 1.73, "percentage": 86.36, "elapsed_time": "1 day, 18:19:53", "remaining_time": "6:41:10"} +{"current_steps": 24410, "total_steps": 28254, "loss": 0.7212, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.259410077339527e-06, "epoch": 1.73, "percentage": 86.39, "elapsed_time": "1 day, 18:20:54", "remaining_time": "6:40:08"} +{"current_steps": 24420, "total_steps": 28254, "loss": 0.7025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2478760156005278e-06, "epoch": 1.73, "percentage": 86.43, "elapsed_time": "1 day, 18:21:57", "remaining_time": "6:39:05"} +{"current_steps": 24430, "total_steps": 28254, "loss": 0.6774, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2363700833454627e-06, "epoch": 1.73, "percentage": 86.47, "elapsed_time": "1 day, 18:22:59", "remaining_time": "6:38:03"} +{"current_steps": 24440, "total_steps": 28254, "loss": 0.7134, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2248922947996464e-06, "epoch": 1.73, "percentage": 86.5, "elapsed_time": "1 day, 18:24:04", "remaining_time": "6:37:00"} +{"current_steps": 24450, "total_steps": 28254, "loss": 0.71, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2134426641535756e-06, "epoch": 1.73, "percentage": 86.54, "elapsed_time": "1 day, 18:25:06", "remaining_time": "6:35:58"} +{"current_steps": 24460, "total_steps": 28254, "loss": 0.7253, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.202021205562965e-06, "epoch": 1.73, "percentage": 86.57, "elapsed_time": "1 day, 18:26:08", "remaining_time": "6:34:56"} +{"current_steps": 24470, "total_steps": 28254, "loss": 0.7008, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.19062793314867e-06, "epoch": 1.73, "percentage": 86.61, "elapsed_time": "1 day, 18:27:09", "remaining_time": "6:33:53"} +{"current_steps": 24480, "total_steps": 28254, "loss": 0.7299, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.179262860996717e-06, "epoch": 1.73, "percentage": 86.64, "elapsed_time": "1 day, 18:28:11", "remaining_time": "6:32:50"} +{"current_steps": 24490, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1679260031582575e-06, "epoch": 1.73, "percentage": 86.68, "elapsed_time": "1 day, 18:29:15", "remaining_time": "6:31:48"} +{"current_steps": 24500, "total_steps": 28254, "loss": 0.6953, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1566173736495663e-06, "epoch": 1.73, "percentage": 86.71, "elapsed_time": "1 day, 18:30:18", "remaining_time": "6:30:46"} +{"current_steps": 24510, "total_steps": 28254, "loss": 0.7024, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1453369864520125e-06, "epoch": 1.73, "percentage": 86.75, "elapsed_time": "1 day, 18:31:20", "remaining_time": "6:29:43"} +{"current_steps": 24520, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.134084855512056e-06, "epoch": 1.74, "percentage": 86.78, "elapsed_time": "1 day, 18:32:22", "remaining_time": "6:28:41"} +{"current_steps": 24530, "total_steps": 28254, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1228609947412074e-06, "epoch": 1.74, "percentage": 86.82, "elapsed_time": "1 day, 18:33:23", "remaining_time": "6:27:38"} +{"current_steps": 24540, "total_steps": 28254, "loss": 0.7137, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.111665418016051e-06, "epoch": 1.74, "percentage": 86.85, "elapsed_time": "1 day, 18:34:25", "remaining_time": "6:26:35"} +{"current_steps": 24550, "total_steps": 28254, "loss": 0.7219, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1004981391781697e-06, "epoch": 1.74, "percentage": 86.89, "elapsed_time": "1 day, 18:35:25", "remaining_time": "6:25:33"} +{"current_steps": 24560, "total_steps": 28254, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0893591720341888e-06, "epoch": 1.74, "percentage": 86.93, "elapsed_time": "1 day, 18:36:26", "remaining_time": "6:24:30"} +{"current_steps": 24570, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0782485303557144e-06, "epoch": 1.74, "percentage": 86.96, "elapsed_time": "1 day, 18:37:31", "remaining_time": "6:23:28"} +{"current_steps": 24580, "total_steps": 28254, "loss": 0.7075, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0671662278793387e-06, "epoch": 1.74, "percentage": 87.0, "elapsed_time": "1 day, 18:38:35", "remaining_time": "6:22:26"} +{"current_steps": 24590, "total_steps": 28254, "loss": 0.6794, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0561122783066128e-06, "epoch": 1.74, "percentage": 87.03, "elapsed_time": "1 day, 18:39:38", "remaining_time": "6:21:23"} +{"current_steps": 24600, "total_steps": 28254, "loss": 0.7195, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0450866953040436e-06, "epoch": 1.74, "percentage": 87.07, "elapsed_time": "1 day, 18:40:39", "remaining_time": "6:20:21"} +{"current_steps": 24610, "total_steps": 28254, "loss": 0.7097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.034089492503052e-06, "epoch": 1.74, "percentage": 87.1, "elapsed_time": "1 day, 18:41:43", "remaining_time": "6:19:18"} +{"current_steps": 24620, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0231206834999793e-06, "epoch": 1.74, "percentage": 87.14, "elapsed_time": "1 day, 18:42:44", "remaining_time": "6:18:16"} +{"current_steps": 24630, "total_steps": 28254, "loss": 0.7295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0121802818560716e-06, "epoch": 1.74, "percentage": 87.17, "elapsed_time": "1 day, 18:43:46", "remaining_time": "6:17:13"} +{"current_steps": 24640, "total_steps": 28254, "loss": 0.7161, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0012683010974366e-06, "epoch": 1.74, "percentage": 87.21, "elapsed_time": "1 day, 18:44:51", "remaining_time": "6:16:11"} +{"current_steps": 24650, "total_steps": 28254, "loss": 0.6945, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9903847547150495e-06, "epoch": 1.74, "percentage": 87.24, "elapsed_time": "1 day, 18:45:52", "remaining_time": "6:15:08"} +{"current_steps": 24660, "total_steps": 28254, "loss": 0.7025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.979529656164736e-06, "epoch": 1.75, "percentage": 87.28, "elapsed_time": "1 day, 18:46:54", "remaining_time": "6:14:06"} +{"current_steps": 24670, "total_steps": 28254, "loss": 0.7047, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9687030188671517e-06, "epoch": 1.75, "percentage": 87.32, "elapsed_time": "1 day, 18:47:55", "remaining_time": "6:13:03"} +{"current_steps": 24680, "total_steps": 28254, "loss": 0.6837, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.957904856207754e-06, "epoch": 1.75, "percentage": 87.35, "elapsed_time": "1 day, 18:48:56", "remaining_time": "6:12:01"} +{"current_steps": 24690, "total_steps": 28254, "loss": 0.7309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9471351815367995e-06, "epoch": 1.75, "percentage": 87.39, "elapsed_time": "1 day, 18:49:58", "remaining_time": "6:10:58"} +{"current_steps": 24700, "total_steps": 28254, "loss": 0.7112, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9363940081693287e-06, "epoch": 1.75, "percentage": 87.42, "elapsed_time": "1 day, 18:51:00", "remaining_time": "6:09:56"} +{"current_steps": 24710, "total_steps": 28254, "loss": 0.698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.92568134938515e-06, "epoch": 1.75, "percentage": 87.46, "elapsed_time": "1 day, 18:52:04", "remaining_time": "6:08:53"} +{"current_steps": 24720, "total_steps": 28254, "loss": 0.7365, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.914997218428796e-06, "epoch": 1.75, "percentage": 87.49, "elapsed_time": "1 day, 18:53:06", "remaining_time": "6:07:51"} +{"current_steps": 24730, "total_steps": 28254, "loss": 0.6988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9043416285095568e-06, "epoch": 1.75, "percentage": 87.53, "elapsed_time": "1 day, 18:54:10", "remaining_time": "6:06:49"} +{"current_steps": 24740, "total_steps": 28254, "loss": 0.7022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8937145928014073e-06, "epoch": 1.75, "percentage": 87.56, "elapsed_time": "1 day, 18:55:11", "remaining_time": "6:05:46"} +{"current_steps": 24750, "total_steps": 28254, "loss": 0.7207, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.883116124443049e-06, "epoch": 1.75, "percentage": 87.6, "elapsed_time": "1 day, 18:56:12", "remaining_time": "6:04:43"} +{"current_steps": 24760, "total_steps": 28254, "loss": 0.7202, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8725462365378366e-06, "epoch": 1.75, "percentage": 87.63, "elapsed_time": "1 day, 18:57:14", "remaining_time": "6:03:41"} +{"current_steps": 24770, "total_steps": 28254, "loss": 0.7263, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.862004942153811e-06, "epoch": 1.75, "percentage": 87.67, "elapsed_time": "1 day, 18:58:15", "remaining_time": "6:02:38"} +{"current_steps": 24780, "total_steps": 28254, "loss": 0.7123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.85149225432365e-06, "epoch": 1.75, "percentage": 87.7, "elapsed_time": "1 day, 18:59:18", "remaining_time": "6:01:36"} +{"current_steps": 24790, "total_steps": 28254, "loss": 0.7025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8410081860446682e-06, "epoch": 1.75, "percentage": 87.74, "elapsed_time": "1 day, 19:00:21", "remaining_time": "6:00:33"} +{"current_steps": 24800, "total_steps": 28254, "loss": 0.7225, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8305527502787912e-06, "epoch": 1.76, "percentage": 87.78, "elapsed_time": "1 day, 19:01:24", "remaining_time": "5:59:31"} +{"current_steps": 24810, "total_steps": 28254, "loss": 0.6956, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8201259599525567e-06, "epoch": 1.76, "percentage": 87.81, "elapsed_time": "1 day, 19:02:27", "remaining_time": "5:58:28"} +{"current_steps": 24820, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8097278279570696e-06, "epoch": 1.76, "percentage": 87.85, "elapsed_time": "1 day, 19:03:28", "remaining_time": "5:57:26"} +{"current_steps": 24830, "total_steps": 28254, "loss": 0.6895, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7993583671480213e-06, "epoch": 1.76, "percentage": 87.88, "elapsed_time": "1 day, 19:04:31", "remaining_time": "5:56:24"} +{"current_steps": 24840, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7890175903456403e-06, "epoch": 1.76, "percentage": 87.92, "elapsed_time": "1 day, 19:05:35", "remaining_time": "5:55:21"} +{"current_steps": 24850, "total_steps": 28254, "loss": 0.7165, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.778705510334705e-06, "epoch": 1.76, "percentage": 87.95, "elapsed_time": "1 day, 19:06:38", "remaining_time": "5:54:19"} +{"current_steps": 24860, "total_steps": 28254, "loss": 0.6951, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7684221398645007e-06, "epoch": 1.76, "percentage": 87.99, "elapsed_time": "1 day, 19:07:40", "remaining_time": "5:53:16"} +{"current_steps": 24870, "total_steps": 28254, "loss": 0.6925, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7581674916488267e-06, "epoch": 1.76, "percentage": 88.02, "elapsed_time": "1 day, 19:08:43", "remaining_time": "5:52:14"} +{"current_steps": 24880, "total_steps": 28254, "loss": 0.724, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7479415783659774e-06, "epoch": 1.76, "percentage": 88.06, "elapsed_time": "1 day, 19:09:43", "remaining_time": "5:51:11"} +{"current_steps": 24890, "total_steps": 28254, "loss": 0.6585, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7377444126587094e-06, "epoch": 1.76, "percentage": 88.09, "elapsed_time": "1 day, 19:10:45", "remaining_time": "5:50:09"} +{"current_steps": 24900, "total_steps": 28254, "loss": 0.7301, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7275760071342379e-06, "epoch": 1.76, "percentage": 88.13, "elapsed_time": "1 day, 19:11:48", "remaining_time": "5:49:06"} +{"current_steps": 24910, "total_steps": 28254, "loss": 0.6777, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7174363743642257e-06, "epoch": 1.76, "percentage": 88.16, "elapsed_time": "1 day, 19:12:52", "remaining_time": "5:48:04"} +{"current_steps": 24920, "total_steps": 28254, "loss": 0.6788, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7073255268847676e-06, "epoch": 1.76, "percentage": 88.2, "elapsed_time": "1 day, 19:13:52", "remaining_time": "5:47:01"} +{"current_steps": 24930, "total_steps": 28254, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6972434771963586e-06, "epoch": 1.76, "percentage": 88.24, "elapsed_time": "1 day, 19:14:53", "remaining_time": "5:45:59"} +{"current_steps": 24940, "total_steps": 28254, "loss": 0.6847, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6871902377638915e-06, "epoch": 1.77, "percentage": 88.27, "elapsed_time": "1 day, 19:15:56", "remaining_time": "5:44:56"} +{"current_steps": 24950, "total_steps": 28254, "loss": 0.7232, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6771658210166436e-06, "epoch": 1.77, "percentage": 88.31, "elapsed_time": "1 day, 19:16:58", "remaining_time": "5:43:54"} +{"current_steps": 24960, "total_steps": 28254, "loss": 0.7024, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6671702393482646e-06, "epoch": 1.77, "percentage": 88.34, "elapsed_time": "1 day, 19:18:00", "remaining_time": "5:42:51"} +{"current_steps": 24970, "total_steps": 28254, "loss": 0.674, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.657203505116739e-06, "epoch": 1.77, "percentage": 88.38, "elapsed_time": "1 day, 19:19:03", "remaining_time": "5:41:49"} +{"current_steps": 24980, "total_steps": 28254, "loss": 0.7264, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.647265630644393e-06, "epoch": 1.77, "percentage": 88.41, "elapsed_time": "1 day, 19:20:07", "remaining_time": "5:40:47"} +{"current_steps": 24990, "total_steps": 28254, "loss": 0.703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6373566282178704e-06, "epoch": 1.77, "percentage": 88.45, "elapsed_time": "1 day, 19:21:08", "remaining_time": "5:39:44"} +{"current_steps": 25000, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6274765100881272e-06, "epoch": 1.77, "percentage": 88.48, "elapsed_time": "1 day, 19:22:11", "remaining_time": "5:38:42"} +{"current_steps": 25010, "total_steps": 28254, "loss": 0.7126, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6176252884703924e-06, "epoch": 1.77, "percentage": 88.52, "elapsed_time": "1 day, 19:23:13", "remaining_time": "5:37:39"} +{"current_steps": 25020, "total_steps": 28254, "loss": 0.7127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6078029755441926e-06, "epoch": 1.77, "percentage": 88.55, "elapsed_time": "1 day, 19:24:15", "remaining_time": "5:36:37"} +{"current_steps": 25030, "total_steps": 28254, "loss": 0.7084, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5980095834532832e-06, "epoch": 1.77, "percentage": 88.59, "elapsed_time": "1 day, 19:25:17", "remaining_time": "5:35:34"} +{"current_steps": 25040, "total_steps": 28254, "loss": 0.6787, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5882451243056928e-06, "epoch": 1.77, "percentage": 88.62, "elapsed_time": "1 day, 19:26:19", "remaining_time": "5:34:32"} +{"current_steps": 25050, "total_steps": 28254, "loss": 0.7314, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.578509610173659e-06, "epoch": 1.77, "percentage": 88.66, "elapsed_time": "1 day, 19:27:21", "remaining_time": "5:33:29"} +{"current_steps": 25060, "total_steps": 28254, "loss": 0.7144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5688030530936425e-06, "epoch": 1.77, "percentage": 88.7, "elapsed_time": "1 day, 19:28:23", "remaining_time": "5:32:27"} +{"current_steps": 25070, "total_steps": 28254, "loss": 0.7208, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.559125465066294e-06, "epoch": 1.77, "percentage": 88.73, "elapsed_time": "1 day, 19:29:27", "remaining_time": "5:31:24"} +{"current_steps": 25080, "total_steps": 28254, "loss": 0.7474, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5494768580564618e-06, "epoch": 1.78, "percentage": 88.77, "elapsed_time": "1 day, 19:30:29", "remaining_time": "5:30:22"} +{"current_steps": 25090, "total_steps": 28254, "loss": 0.7082, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5398572439931591e-06, "epoch": 1.78, "percentage": 88.8, "elapsed_time": "1 day, 19:31:32", "remaining_time": "5:29:19"} +{"current_steps": 25100, "total_steps": 28254, "loss": 0.7083, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5302666347695477e-06, "epoch": 1.78, "percentage": 88.84, "elapsed_time": "1 day, 19:32:35", "remaining_time": "5:28:17"} +{"current_steps": 25110, "total_steps": 28254, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5207050422429286e-06, "epoch": 1.78, "percentage": 88.87, "elapsed_time": "1 day, 19:33:36", "remaining_time": "5:27:14"} +{"current_steps": 25120, "total_steps": 28254, "loss": 0.6739, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5111724782347375e-06, "epoch": 1.78, "percentage": 88.91, "elapsed_time": "1 day, 19:34:42", "remaining_time": "5:26:12"} +{"current_steps": 25130, "total_steps": 28254, "loss": 0.7285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.501668954530519e-06, "epoch": 1.78, "percentage": 88.94, "elapsed_time": "1 day, 19:35:44", "remaining_time": "5:25:10"} +{"current_steps": 25140, "total_steps": 28254, "loss": 0.6637, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4921944828799105e-06, "epoch": 1.78, "percentage": 88.98, "elapsed_time": "1 day, 19:36:45", "remaining_time": "5:24:07"} +{"current_steps": 25150, "total_steps": 28254, "loss": 0.6976, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4827490749966227e-06, "epoch": 1.78, "percentage": 89.01, "elapsed_time": "1 day, 19:37:49", "remaining_time": "5:23:05"} +{"current_steps": 25160, "total_steps": 28254, "loss": 0.7068, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4733327425584504e-06, "epoch": 1.78, "percentage": 89.05, "elapsed_time": "1 day, 19:38:51", "remaining_time": "5:22:02"} +{"current_steps": 25170, "total_steps": 28254, "loss": 0.6992, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4639454972072337e-06, "epoch": 1.78, "percentage": 89.08, "elapsed_time": "1 day, 19:39:54", "remaining_time": "5:21:00"} +{"current_steps": 25180, "total_steps": 28254, "loss": 0.694, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4545873505488471e-06, "epoch": 1.78, "percentage": 89.12, "elapsed_time": "1 day, 19:40:57", "remaining_time": "5:19:58"} +{"current_steps": 25190, "total_steps": 28254, "loss": 0.7144, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4452583141531888e-06, "epoch": 1.78, "percentage": 89.16, "elapsed_time": "1 day, 19:41:58", "remaining_time": "5:18:55"} +{"current_steps": 25200, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4359583995541741e-06, "epoch": 1.78, "percentage": 89.19, "elapsed_time": "1 day, 19:43:01", "remaining_time": "5:17:53"} +{"current_steps": 25210, "total_steps": 28254, "loss": 0.6898, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.426687618249714e-06, "epoch": 1.78, "percentage": 89.23, "elapsed_time": "1 day, 19:44:04", "remaining_time": "5:16:50"} +{"current_steps": 25220, "total_steps": 28254, "loss": 0.6764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4174459817016927e-06, "epoch": 1.79, "percentage": 89.26, "elapsed_time": "1 day, 19:45:06", "remaining_time": "5:15:48"} +{"current_steps": 25230, "total_steps": 28254, "loss": 0.6904, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4082335013359593e-06, "epoch": 1.79, "percentage": 89.3, "elapsed_time": "1 day, 19:46:10", "remaining_time": "5:14:46"} +{"current_steps": 25240, "total_steps": 28254, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3990501885423251e-06, "epoch": 1.79, "percentage": 89.33, "elapsed_time": "1 day, 19:47:13", "remaining_time": "5:13:43"} +{"current_steps": 25250, "total_steps": 28254, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3898960546745443e-06, "epoch": 1.79, "percentage": 89.37, "elapsed_time": "1 day, 19:48:16", "remaining_time": "5:12:41"} +{"current_steps": 25260, "total_steps": 28254, "loss": 0.7231, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3807711110502802e-06, "epoch": 1.79, "percentage": 89.4, "elapsed_time": "1 day, 19:49:18", "remaining_time": "5:11:38"} +{"current_steps": 25270, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3716753689511142e-06, "epoch": 1.79, "percentage": 89.44, "elapsed_time": "1 day, 19:50:21", "remaining_time": "5:10:36"} +{"current_steps": 25280, "total_steps": 28254, "loss": 0.6942, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3626088396225284e-06, "epoch": 1.79, "percentage": 89.47, "elapsed_time": "1 day, 19:51:23", "remaining_time": "5:09:33"} +{"current_steps": 25290, "total_steps": 28254, "loss": 0.7291, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.353571534273887e-06, "epoch": 1.79, "percentage": 89.51, "elapsed_time": "1 day, 19:52:25", "remaining_time": "5:08:31"} +{"current_steps": 25300, "total_steps": 28254, "loss": 0.6808, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3445634640784133e-06, "epoch": 1.79, "percentage": 89.54, "elapsed_time": "1 day, 19:53:28", "remaining_time": "5:07:28"} +{"current_steps": 25310, "total_steps": 28254, "loss": 0.6962, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3355846401732015e-06, "epoch": 1.79, "percentage": 89.58, "elapsed_time": "1 day, 19:54:31", "remaining_time": "5:06:26"} +{"current_steps": 25320, "total_steps": 28254, "loss": 0.736, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3266350736591744e-06, "epoch": 1.79, "percentage": 89.62, "elapsed_time": "1 day, 19:55:31", "remaining_time": "5:05:23"} +{"current_steps": 25330, "total_steps": 28254, "loss": 0.6821, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3177147756010893e-06, "epoch": 1.79, "percentage": 89.65, "elapsed_time": "1 day, 19:56:35", "remaining_time": "5:04:21"} +{"current_steps": 25340, "total_steps": 28254, "loss": 0.6935, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3088237570275165e-06, "epoch": 1.79, "percentage": 89.69, "elapsed_time": "1 day, 19:57:38", "remaining_time": "5:03:19"} +{"current_steps": 25350, "total_steps": 28254, "loss": 0.7366, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2999620289308263e-06, "epoch": 1.79, "percentage": 89.72, "elapsed_time": "1 day, 19:58:41", "remaining_time": "5:02:16"} +{"current_steps": 25360, "total_steps": 28254, "loss": 0.7145, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2911296022671716e-06, "epoch": 1.8, "percentage": 89.76, "elapsed_time": "1 day, 19:59:43", "remaining_time": "5:01:14"} +{"current_steps": 25370, "total_steps": 28254, "loss": 0.6949, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2823264879564838e-06, "epoch": 1.8, "percentage": 89.79, "elapsed_time": "1 day, 20:00:46", "remaining_time": "5:00:11"} +{"current_steps": 25380, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2735526968824575e-06, "epoch": 1.8, "percentage": 89.83, "elapsed_time": "1 day, 20:01:49", "remaining_time": "4:59:09"} +{"current_steps": 25390, "total_steps": 28254, "loss": 0.7214, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.264808239892526e-06, "epoch": 1.8, "percentage": 89.86, "elapsed_time": "1 day, 20:02:51", "remaining_time": "4:58:06"} +{"current_steps": 25400, "total_steps": 28254, "loss": 0.7128, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2560931277978526e-06, "epoch": 1.8, "percentage": 89.9, "elapsed_time": "1 day, 20:03:54", "remaining_time": "4:57:04"} +{"current_steps": 25410, "total_steps": 28254, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2474073713733353e-06, "epoch": 1.8, "percentage": 89.93, "elapsed_time": "1 day, 20:05:00", "remaining_time": "4:56:02"} +{"current_steps": 25420, "total_steps": 28254, "loss": 0.7233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.238750981357567e-06, "epoch": 1.8, "percentage": 89.97, "elapsed_time": "1 day, 20:06:03", "remaining_time": "4:55:00"} +{"current_steps": 25430, "total_steps": 28254, "loss": 0.7049, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2301239684528342e-06, "epoch": 1.8, "percentage": 90.0, "elapsed_time": "1 day, 20:07:03", "remaining_time": "4:53:57"} +{"current_steps": 25440, "total_steps": 28254, "loss": 0.7242, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2215263433250995e-06, "epoch": 1.8, "percentage": 90.04, "elapsed_time": "1 day, 20:08:08", "remaining_time": "4:52:55"} +{"current_steps": 25450, "total_steps": 28254, "loss": 0.6809, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2129581166040043e-06, "epoch": 1.8, "percentage": 90.08, "elapsed_time": "1 day, 20:09:11", "remaining_time": "4:51:52"} +{"current_steps": 25460, "total_steps": 28254, "loss": 0.7136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2044192988828362e-06, "epoch": 1.8, "percentage": 90.11, "elapsed_time": "1 day, 20:10:12", "remaining_time": "4:50:50"} +{"current_steps": 25470, "total_steps": 28254, "loss": 0.6874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1959099007185226e-06, "epoch": 1.8, "percentage": 90.15, "elapsed_time": "1 day, 20:11:15", "remaining_time": "4:49:47"} +{"current_steps": 25480, "total_steps": 28254, "loss": 0.7122, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1874299326316147e-06, "epoch": 1.8, "percentage": 90.18, "elapsed_time": "1 day, 20:12:16", "remaining_time": "4:48:45"} +{"current_steps": 25490, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1789794051062815e-06, "epoch": 1.8, "percentage": 90.22, "elapsed_time": "1 day, 20:13:19", "remaining_time": "4:47:42"} +{"current_steps": 25500, "total_steps": 28254, "loss": 0.7135, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1705583285903043e-06, "epoch": 1.8, "percentage": 90.25, "elapsed_time": "1 day, 20:14:21", "remaining_time": "4:46:40"} +{"current_steps": 25510, "total_steps": 28254, "loss": 0.6536, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.162166713495033e-06, "epoch": 1.81, "percentage": 90.29, "elapsed_time": "1 day, 20:15:22", "remaining_time": "4:45:37"} +{"current_steps": 25520, "total_steps": 28254, "loss": 0.6984, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1538045701954047e-06, "epoch": 1.81, "percentage": 90.32, "elapsed_time": "1 day, 20:16:26", "remaining_time": "4:44:35"} +{"current_steps": 25530, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1454719090299131e-06, "epoch": 1.81, "percentage": 90.36, "elapsed_time": "1 day, 20:17:30", "remaining_time": "4:43:33"} +{"current_steps": 25540, "total_steps": 28254, "loss": 0.6937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1371687403006147e-06, "epoch": 1.81, "percentage": 90.39, "elapsed_time": "1 day, 20:18:33", "remaining_time": "4:42:30"} +{"current_steps": 25550, "total_steps": 28254, "loss": 0.7263, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1288950742730898e-06, "epoch": 1.81, "percentage": 90.43, "elapsed_time": "1 day, 20:19:36", "remaining_time": "4:41:28"} +{"current_steps": 25560, "total_steps": 28254, "loss": 0.6799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1206509211764415e-06, "epoch": 1.81, "percentage": 90.47, "elapsed_time": "1 day, 20:20:38", "remaining_time": "4:40:25"} +{"current_steps": 25570, "total_steps": 28254, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1124362912032949e-06, "epoch": 1.81, "percentage": 90.5, "elapsed_time": "1 day, 20:21:40", "remaining_time": "4:39:23"} +{"current_steps": 25580, "total_steps": 28254, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.10425119450977e-06, "epoch": 1.81, "percentage": 90.54, "elapsed_time": "1 day, 20:22:45", "remaining_time": "4:38:21"} +{"current_steps": 25590, "total_steps": 28254, "loss": 0.7283, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0960956412154777e-06, "epoch": 1.81, "percentage": 90.57, "elapsed_time": "1 day, 20:23:48", "remaining_time": "4:37:18"} +{"current_steps": 25600, "total_steps": 28254, "loss": 0.7374, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.087969641403494e-06, "epoch": 1.81, "percentage": 90.61, "elapsed_time": "1 day, 20:24:48", "remaining_time": "4:36:15"} +{"current_steps": 25610, "total_steps": 28254, "loss": 0.7013, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0798732051203603e-06, "epoch": 1.81, "percentage": 90.64, "elapsed_time": "1 day, 20:25:50", "remaining_time": "4:35:13"} +{"current_steps": 25620, "total_steps": 28254, "loss": 0.6999, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0718063423760722e-06, "epoch": 1.81, "percentage": 90.68, "elapsed_time": "1 day, 20:26:53", "remaining_time": "4:34:11"} +{"current_steps": 25630, "total_steps": 28254, "loss": 0.7123, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0637690631440572e-06, "epoch": 1.81, "percentage": 90.71, "elapsed_time": "1 day, 20:27:57", "remaining_time": "4:33:08"} +{"current_steps": 25640, "total_steps": 28254, "loss": 0.6978, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0557613773611697e-06, "epoch": 1.81, "percentage": 90.75, "elapsed_time": "1 day, 20:28:59", "remaining_time": "4:32:06"} +{"current_steps": 25650, "total_steps": 28254, "loss": 0.7393, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0477832949276706e-06, "epoch": 1.82, "percentage": 90.78, "elapsed_time": "1 day, 20:30:02", "remaining_time": "4:31:03"} +{"current_steps": 25660, "total_steps": 28254, "loss": 0.727, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0398348257072282e-06, "epoch": 1.82, "percentage": 90.82, "elapsed_time": "1 day, 20:31:03", "remaining_time": "4:30:01"} +{"current_steps": 25670, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0319159795268984e-06, "epoch": 1.82, "percentage": 90.85, "elapsed_time": "1 day, 20:32:09", "remaining_time": "4:28:59"} +{"current_steps": 25680, "total_steps": 28254, "loss": 0.7097, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.024026766177108e-06, "epoch": 1.82, "percentage": 90.89, "elapsed_time": "1 day, 20:33:12", "remaining_time": "4:27:56"} +{"current_steps": 25690, "total_steps": 28254, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0161671954116464e-06, "epoch": 1.82, "percentage": 90.93, "elapsed_time": "1 day, 20:34:14", "remaining_time": "4:26:54"} +{"current_steps": 25700, "total_steps": 28254, "loss": 0.7086, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0083372769476629e-06, "epoch": 1.82, "percentage": 90.96, "elapsed_time": "1 day, 20:35:18", "remaining_time": "4:25:51"} +{"current_steps": 25710, "total_steps": 28254, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0005370204656418e-06, "epoch": 1.82, "percentage": 91.0, "elapsed_time": "1 day, 20:36:20", "remaining_time": "4:24:49"} +{"current_steps": 25720, "total_steps": 28254, "loss": 0.6914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.927664356093908e-07, "epoch": 1.82, "percentage": 91.03, "elapsed_time": "1 day, 20:37:22", "remaining_time": "4:23:46"} +{"current_steps": 25730, "total_steps": 28254, "loss": 0.6929, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.850255319860362e-07, "epoch": 1.82, "percentage": 91.07, "elapsed_time": "1 day, 20:38:25", "remaining_time": "4:22:44"} +{"current_steps": 25740, "total_steps": 28254, "loss": 0.7269, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.773143191660116e-07, "epoch": 1.82, "percentage": 91.1, "elapsed_time": "1 day, 20:39:28", "remaining_time": "4:21:42"} +{"current_steps": 25750, "total_steps": 28254, "loss": 0.7243, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.696328066830378e-07, "epoch": 1.82, "percentage": 91.14, "elapsed_time": "1 day, 20:40:31", "remaining_time": "4:20:39"} +{"current_steps": 25760, "total_steps": 28254, "loss": 0.718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.61981004034121e-07, "epoch": 1.82, "percentage": 91.17, "elapsed_time": "1 day, 20:41:34", "remaining_time": "4:19:37"} +{"current_steps": 25770, "total_steps": 28254, "loss": 0.7295, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.54358920679524e-07, "epoch": 1.82, "percentage": 91.21, "elapsed_time": "1 day, 20:42:35", "remaining_time": "4:18:34"} +{"current_steps": 25780, "total_steps": 28254, "loss": 0.7101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.46766566042781e-07, "epoch": 1.82, "percentage": 91.24, "elapsed_time": "1 day, 20:43:38", "remaining_time": "4:17:32"} +{"current_steps": 25790, "total_steps": 28254, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.392039495106642e-07, "epoch": 1.83, "percentage": 91.28, "elapsed_time": "1 day, 20:44:41", "remaining_time": "4:16:29"} +{"current_steps": 25800, "total_steps": 28254, "loss": 0.7022, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.31671080433183e-07, "epoch": 1.83, "percentage": 91.31, "elapsed_time": "1 day, 20:45:45", "remaining_time": "4:15:27"} +{"current_steps": 25810, "total_steps": 28254, "loss": 0.7167, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.241679681235572e-07, "epoch": 1.83, "percentage": 91.35, "elapsed_time": "1 day, 20:46:46", "remaining_time": "4:14:24"} +{"current_steps": 25820, "total_steps": 28254, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.166946218582301e-07, "epoch": 1.83, "percentage": 91.39, "elapsed_time": "1 day, 20:47:47", "remaining_time": "4:13:22"} +{"current_steps": 25830, "total_steps": 28254, "loss": 0.7036, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.092510508768387e-07, "epoch": 1.83, "percentage": 91.42, "elapsed_time": "1 day, 20:48:51", "remaining_time": "4:12:20"} +{"current_steps": 25840, "total_steps": 28254, "loss": 0.7064, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.018372643822132e-07, "epoch": 1.83, "percentage": 91.46, "elapsed_time": "1 day, 20:49:54", "remaining_time": "4:11:17"} +{"current_steps": 25850, "total_steps": 28254, "loss": 0.7124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.944532715403408e-07, "epoch": 1.83, "percentage": 91.49, "elapsed_time": "1 day, 20:50:57", "remaining_time": "4:10:15"} +{"current_steps": 25860, "total_steps": 28254, "loss": 0.7441, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.87099081480397e-07, "epoch": 1.83, "percentage": 91.53, "elapsed_time": "1 day, 20:51:59", "remaining_time": "4:09:12"} +{"current_steps": 25870, "total_steps": 28254, "loss": 0.7099, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.797747032947001e-07, "epoch": 1.83, "percentage": 91.56, "elapsed_time": "1 day, 20:53:01", "remaining_time": "4:08:10"} +{"current_steps": 25880, "total_steps": 28254, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.724801460387094e-07, "epoch": 1.83, "percentage": 91.6, "elapsed_time": "1 day, 20:54:04", "remaining_time": "4:07:07"} +{"current_steps": 25890, "total_steps": 28254, "loss": 0.7032, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.652154187310218e-07, "epoch": 1.83, "percentage": 91.63, "elapsed_time": "1 day, 20:55:07", "remaining_time": "4:06:05"} +{"current_steps": 25900, "total_steps": 28254, "loss": 0.7031, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.579805303533417e-07, "epoch": 1.83, "percentage": 91.67, "elapsed_time": "1 day, 20:56:09", "remaining_time": "4:05:02"} +{"current_steps": 25910, "total_steps": 28254, "loss": 0.6833, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.507754898504943e-07, "epoch": 1.83, "percentage": 91.7, "elapsed_time": "1 day, 20:57:11", "remaining_time": "4:04:00"} +{"current_steps": 25920, "total_steps": 28254, "loss": 0.7052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.436003061304043e-07, "epoch": 1.83, "percentage": 91.74, "elapsed_time": "1 day, 20:58:14", "remaining_time": "4:02:57"} +{"current_steps": 25930, "total_steps": 28254, "loss": 0.7, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.364549880640671e-07, "epoch": 1.84, "percentage": 91.77, "elapsed_time": "1 day, 20:59:17", "remaining_time": "4:01:55"} +{"current_steps": 25940, "total_steps": 28254, "loss": 0.7127, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.293395444855662e-07, "epoch": 1.84, "percentage": 91.81, "elapsed_time": "1 day, 21:00:18", "remaining_time": "4:00:52"} +{"current_steps": 25950, "total_steps": 28254, "loss": 0.709, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.222539841920507e-07, "epoch": 1.84, "percentage": 91.85, "elapsed_time": "1 day, 21:01:20", "remaining_time": "3:59:50"} +{"current_steps": 25960, "total_steps": 28254, "loss": 0.6866, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.151983159437215e-07, "epoch": 1.84, "percentage": 91.88, "elapsed_time": "1 day, 21:02:23", "remaining_time": "3:58:48"} +{"current_steps": 25970, "total_steps": 28254, "loss": 0.7142, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.081725484638176e-07, "epoch": 1.84, "percentage": 91.92, "elapsed_time": "1 day, 21:03:27", "remaining_time": "3:57:45"} +{"current_steps": 25980, "total_steps": 28254, "loss": 0.7077, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.011766904386154e-07, "epoch": 1.84, "percentage": 91.95, "elapsed_time": "1 day, 21:04:26", "remaining_time": "3:56:43"} +{"current_steps": 25990, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.942107505174102e-07, "epoch": 1.84, "percentage": 91.99, "elapsed_time": "1 day, 21:05:30", "remaining_time": "3:55:40"} +{"current_steps": 26000, "total_steps": 28254, "loss": 0.7148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.872747373125156e-07, "epoch": 1.84, "percentage": 92.02, "elapsed_time": "1 day, 21:06:31", "remaining_time": "3:54:38"} +{"current_steps": 26010, "total_steps": 28254, "loss": 0.7326, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.80368659399236e-07, "epoch": 1.84, "percentage": 92.06, "elapsed_time": "1 day, 21:07:33", "remaining_time": "3:53:35"} +{"current_steps": 26020, "total_steps": 28254, "loss": 0.7066, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.734925253158665e-07, "epoch": 1.84, "percentage": 92.09, "elapsed_time": "1 day, 21:08:35", "remaining_time": "3:52:33"} +{"current_steps": 26030, "total_steps": 28254, "loss": 0.6938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.666463435636873e-07, "epoch": 1.84, "percentage": 92.13, "elapsed_time": "1 day, 21:09:37", "remaining_time": "3:51:30"} +{"current_steps": 26040, "total_steps": 28254, "loss": 0.6948, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.598301226069443e-07, "epoch": 1.84, "percentage": 92.16, "elapsed_time": "1 day, 21:10:39", "remaining_time": "3:50:28"} +{"current_steps": 26050, "total_steps": 28254, "loss": 0.6797, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.53043870872841e-07, "epoch": 1.84, "percentage": 92.2, "elapsed_time": "1 day, 21:11:40", "remaining_time": "3:49:25"} +{"current_steps": 26060, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.462875967515242e-07, "epoch": 1.84, "percentage": 92.23, "elapsed_time": "1 day, 21:12:41", "remaining_time": "3:48:22"} +{"current_steps": 26070, "total_steps": 28254, "loss": 0.7184, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.395613085960873e-07, "epoch": 1.85, "percentage": 92.27, "elapsed_time": "1 day, 21:13:44", "remaining_time": "3:47:20"} +{"current_steps": 26080, "total_steps": 28254, "loss": 0.6855, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.328650147225419e-07, "epoch": 1.85, "percentage": 92.31, "elapsed_time": "1 day, 21:14:48", "remaining_time": "3:46:18"} +{"current_steps": 26090, "total_steps": 28254, "loss": 0.7092, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.261987234098238e-07, "epoch": 1.85, "percentage": 92.34, "elapsed_time": "1 day, 21:15:50", "remaining_time": "3:45:15"} +{"current_steps": 26100, "total_steps": 28254, "loss": 0.717, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.19562442899771e-07, "epoch": 1.85, "percentage": 92.38, "elapsed_time": "1 day, 21:16:54", "remaining_time": "3:44:13"} +{"current_steps": 26110, "total_steps": 28254, "loss": 0.7146, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.129561813971147e-07, "epoch": 1.85, "percentage": 92.41, "elapsed_time": "1 day, 21:17:55", "remaining_time": "3:43:10"} +{"current_steps": 26120, "total_steps": 28254, "loss": 0.7148, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.06379947069477e-07, "epoch": 1.85, "percentage": 92.45, "elapsed_time": "1 day, 21:18:57", "remaining_time": "3:42:08"} +{"current_steps": 26130, "total_steps": 28254, "loss": 0.688, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.998337480473599e-07, "epoch": 1.85, "percentage": 92.48, "elapsed_time": "1 day, 21:19:59", "remaining_time": "3:41:05"} +{"current_steps": 26140, "total_steps": 28254, "loss": 0.7048, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.933175924241225e-07, "epoch": 1.85, "percentage": 92.52, "elapsed_time": "1 day, 21:21:00", "remaining_time": "3:40:03"} +{"current_steps": 26150, "total_steps": 28254, "loss": 0.7307, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.868314882559845e-07, "epoch": 1.85, "percentage": 92.55, "elapsed_time": "1 day, 21:22:03", "remaining_time": "3:39:00"} +{"current_steps": 26160, "total_steps": 28254, "loss": 0.721, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.803754435620091e-07, "epoch": 1.85, "percentage": 92.59, "elapsed_time": "1 day, 21:23:06", "remaining_time": "3:37:58"} +{"current_steps": 26170, "total_steps": 28254, "loss": 0.7126, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.739494663241031e-07, "epoch": 1.85, "percentage": 92.62, "elapsed_time": "1 day, 21:24:09", "remaining_time": "3:36:56"} +{"current_steps": 26180, "total_steps": 28254, "loss": 0.695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.675535644869891e-07, "epoch": 1.85, "percentage": 92.66, "elapsed_time": "1 day, 21:25:12", "remaining_time": "3:35:53"} +{"current_steps": 26190, "total_steps": 28254, "loss": 0.6855, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.611877459582083e-07, "epoch": 1.85, "percentage": 92.69, "elapsed_time": "1 day, 21:26:15", "remaining_time": "3:34:51"} +{"current_steps": 26200, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.548520186081181e-07, "epoch": 1.85, "percentage": 92.73, "elapsed_time": "1 day, 21:27:18", "remaining_time": "3:33:48"} +{"current_steps": 26210, "total_steps": 28254, "loss": 0.7166, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.485463902698635e-07, "epoch": 1.86, "percentage": 92.77, "elapsed_time": "1 day, 21:28:21", "remaining_time": "3:32:46"} +{"current_steps": 26220, "total_steps": 28254, "loss": 0.6708, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.42270868739378e-07, "epoch": 1.86, "percentage": 92.8, "elapsed_time": "1 day, 21:29:22", "remaining_time": "3:31:43"} +{"current_steps": 26230, "total_steps": 28254, "loss": 0.7116, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.360254617753719e-07, "epoch": 1.86, "percentage": 92.84, "elapsed_time": "1 day, 21:30:24", "remaining_time": "3:30:41"} +{"current_steps": 26240, "total_steps": 28254, "loss": 0.727, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.29810177099327e-07, "epoch": 1.86, "percentage": 92.87, "elapsed_time": "1 day, 21:31:26", "remaining_time": "3:29:38"} +{"current_steps": 26250, "total_steps": 28254, "loss": 0.7041, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.236250223954854e-07, "epoch": 1.86, "percentage": 92.91, "elapsed_time": "1 day, 21:32:31", "remaining_time": "3:28:36"} +{"current_steps": 26260, "total_steps": 28254, "loss": 0.7394, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.174700053108329e-07, "epoch": 1.86, "percentage": 92.94, "elapsed_time": "1 day, 21:33:35", "remaining_time": "3:27:34"} +{"current_steps": 26270, "total_steps": 28254, "loss": 0.696, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.113451334550934e-07, "epoch": 1.86, "percentage": 92.98, "elapsed_time": "1 day, 21:34:36", "remaining_time": "3:26:31"} +{"current_steps": 26280, "total_steps": 28254, "loss": 0.725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.052504144007265e-07, "epoch": 1.86, "percentage": 93.01, "elapsed_time": "1 day, 21:35:38", "remaining_time": "3:25:29"} +{"current_steps": 26290, "total_steps": 28254, "loss": 0.7124, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.991858556829072e-07, "epoch": 1.86, "percentage": 93.05, "elapsed_time": "1 day, 21:36:41", "remaining_time": "3:24:26"} +{"current_steps": 26300, "total_steps": 28254, "loss": 0.7376, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.93151464799535e-07, "epoch": 1.86, "percentage": 93.08, "elapsed_time": "1 day, 21:37:43", "remaining_time": "3:23:24"} +{"current_steps": 26310, "total_steps": 28254, "loss": 0.7172, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.871472492111868e-07, "epoch": 1.86, "percentage": 93.12, "elapsed_time": "1 day, 21:38:46", "remaining_time": "3:22:21"} +{"current_steps": 26320, "total_steps": 28254, "loss": 0.7413, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.811732163411549e-07, "epoch": 1.86, "percentage": 93.15, "elapsed_time": "1 day, 21:39:49", "remaining_time": "3:21:19"} +{"current_steps": 26330, "total_steps": 28254, "loss": 0.7297, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.75229373575406e-07, "epoch": 1.86, "percentage": 93.19, "elapsed_time": "1 day, 21:40:50", "remaining_time": "3:20:16"} +{"current_steps": 26340, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.693157282625872e-07, "epoch": 1.86, "percentage": 93.23, "elapsed_time": "1 day, 21:41:53", "remaining_time": "3:19:14"} +{"current_steps": 26350, "total_steps": 28254, "loss": 0.6955, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.634322877140025e-07, "epoch": 1.87, "percentage": 93.26, "elapsed_time": "1 day, 21:42:54", "remaining_time": "3:18:11"} +{"current_steps": 26360, "total_steps": 28254, "loss": 0.7184, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.575790592036139e-07, "epoch": 1.87, "percentage": 93.3, "elapsed_time": "1 day, 21:43:56", "remaining_time": "3:17:09"} +{"current_steps": 26370, "total_steps": 28254, "loss": 0.7002, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.517560499680358e-07, "epoch": 1.87, "percentage": 93.33, "elapsed_time": "1 day, 21:44:59", "remaining_time": "3:16:06"} +{"current_steps": 26380, "total_steps": 28254, "loss": 0.6985, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.459632672065229e-07, "epoch": 1.87, "percentage": 93.37, "elapsed_time": "1 day, 21:46:03", "remaining_time": "3:15:04"} +{"current_steps": 26390, "total_steps": 28254, "loss": 0.7, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.402007180809493e-07, "epoch": 1.87, "percentage": 93.4, "elapsed_time": "1 day, 21:47:05", "remaining_time": "3:14:02"} +{"current_steps": 26400, "total_steps": 28254, "loss": 0.6975, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.344684097158215e-07, "epoch": 1.87, "percentage": 93.44, "elapsed_time": "1 day, 21:48:07", "remaining_time": "3:12:59"} +{"current_steps": 26410, "total_steps": 28254, "loss": 0.6832, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.287663491982481e-07, "epoch": 1.87, "percentage": 93.47, "elapsed_time": "1 day, 21:49:09", "remaining_time": "3:11:57"} +{"current_steps": 26420, "total_steps": 28254, "loss": 0.689, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.230945435779483e-07, "epoch": 1.87, "percentage": 93.51, "elapsed_time": "1 day, 21:50:12", "remaining_time": "3:10:54"} +{"current_steps": 26430, "total_steps": 28254, "loss": 0.7153, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.174529998672322e-07, "epoch": 1.87, "percentage": 93.54, "elapsed_time": "1 day, 21:51:13", "remaining_time": "3:09:52"} +{"current_steps": 26440, "total_steps": 28254, "loss": 0.7424, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.118417250409957e-07, "epoch": 1.87, "percentage": 93.58, "elapsed_time": "1 day, 21:52:15", "remaining_time": "3:08:49"} +{"current_steps": 26450, "total_steps": 28254, "loss": 0.6872, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.062607260367142e-07, "epoch": 1.87, "percentage": 93.62, "elapsed_time": "1 day, 21:53:16", "remaining_time": "3:07:47"} +{"current_steps": 26460, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.007100097544293e-07, "epoch": 1.87, "percentage": 93.65, "elapsed_time": "1 day, 21:54:20", "remaining_time": "3:06:44"} +{"current_steps": 26470, "total_steps": 28254, "loss": 0.7051, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.95189583056746e-07, "epoch": 1.87, "percentage": 93.69, "elapsed_time": "1 day, 21:55:23", "remaining_time": "3:05:42"} +{"current_steps": 26480, "total_steps": 28254, "loss": 0.7067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.896994527688104e-07, "epoch": 1.87, "percentage": 93.72, "elapsed_time": "1 day, 21:56:26", "remaining_time": "3:04:39"} +{"current_steps": 26490, "total_steps": 28254, "loss": 0.7005, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.842396256783288e-07, "epoch": 1.88, "percentage": 93.76, "elapsed_time": "1 day, 21:57:28", "remaining_time": "3:03:37"} +{"current_steps": 26500, "total_steps": 28254, "loss": 0.706, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.788101085355267e-07, "epoch": 1.88, "percentage": 93.79, "elapsed_time": "1 day, 21:58:29", "remaining_time": "3:02:34"} +{"current_steps": 26510, "total_steps": 28254, "loss": 0.7267, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7341090805317346e-07, "epoch": 1.88, "percentage": 93.83, "elapsed_time": "1 day, 21:59:32", "remaining_time": "3:01:32"} +{"current_steps": 26520, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.68042030906532e-07, "epoch": 1.88, "percentage": 93.86, "elapsed_time": "1 day, 22:00:34", "remaining_time": "3:00:29"} +{"current_steps": 26530, "total_steps": 28254, "loss": 0.6997, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.6270348373339545e-07, "epoch": 1.88, "percentage": 93.9, "elapsed_time": "1 day, 22:01:34", "remaining_time": "2:59:27"} +{"current_steps": 26540, "total_steps": 28254, "loss": 0.7155, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.573952731340536e-07, "epoch": 1.88, "percentage": 93.93, "elapsed_time": "1 day, 22:02:38", "remaining_time": "2:58:24"} +{"current_steps": 26550, "total_steps": 28254, "loss": 0.6904, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.5211740567129014e-07, "epoch": 1.88, "percentage": 93.97, "elapsed_time": "1 day, 22:03:41", "remaining_time": "2:57:22"} +{"current_steps": 26560, "total_steps": 28254, "loss": 0.7309, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.468698878703631e-07, "epoch": 1.88, "percentage": 94.0, "elapsed_time": "1 day, 22:04:43", "remaining_time": "2:56:20"} +{"current_steps": 26570, "total_steps": 28254, "loss": 0.6915, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.416527262190218e-07, "epoch": 1.88, "percentage": 94.04, "elapsed_time": "1 day, 22:05:43", "remaining_time": "2:55:17"} +{"current_steps": 26580, "total_steps": 28254, "loss": 0.7185, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3646592716748423e-07, "epoch": 1.88, "percentage": 94.08, "elapsed_time": "1 day, 22:06:47", "remaining_time": "2:54:15"} +{"current_steps": 26590, "total_steps": 28254, "loss": 0.695, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.3130949712842093e-07, "epoch": 1.88, "percentage": 94.11, "elapsed_time": "1 day, 22:07:49", "remaining_time": "2:53:12"} +{"current_steps": 26600, "total_steps": 28254, "loss": 0.7061, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2618344247696284e-07, "epoch": 1.88, "percentage": 94.15, "elapsed_time": "1 day, 22:08:50", "remaining_time": "2:52:10"} +{"current_steps": 26610, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.210877695506793e-07, "epoch": 1.88, "percentage": 94.18, "elapsed_time": "1 day, 22:09:54", "remaining_time": "2:51:07"} +{"current_steps": 26620, "total_steps": 28254, "loss": 0.7226, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.1602248464958914e-07, "epoch": 1.88, "percentage": 94.22, "elapsed_time": "1 day, 22:11:00", "remaining_time": "2:50:05"} +{"current_steps": 26630, "total_steps": 28254, "loss": 0.6967, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.109875940361302e-07, "epoch": 1.88, "percentage": 94.25, "elapsed_time": "1 day, 22:12:05", "remaining_time": "2:49:03"} +{"current_steps": 26640, "total_steps": 28254, "loss": 0.6913, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.059831039351675e-07, "epoch": 1.89, "percentage": 94.29, "elapsed_time": "1 day, 22:13:07", "remaining_time": "2:48:00"} +{"current_steps": 26650, "total_steps": 28254, "loss": 0.6682, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.010090205339767e-07, "epoch": 1.89, "percentage": 94.32, "elapsed_time": "1 day, 22:14:12", "remaining_time": "2:46:58"} +{"current_steps": 26660, "total_steps": 28254, "loss": 0.7328, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.960653499822442e-07, "epoch": 1.89, "percentage": 94.36, "elapsed_time": "1 day, 22:15:15", "remaining_time": "2:45:55"} +{"current_steps": 26670, "total_steps": 28254, "loss": 0.7028, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.911520983920558e-07, "epoch": 1.89, "percentage": 94.39, "elapsed_time": "1 day, 22:16:16", "remaining_time": "2:44:53"} +{"current_steps": 26680, "total_steps": 28254, "loss": 0.6989, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.862692718378885e-07, "epoch": 1.89, "percentage": 94.43, "elapsed_time": "1 day, 22:17:18", "remaining_time": "2:43:50"} +{"current_steps": 26690, "total_steps": 28254, "loss": 0.6951, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.8141687635659953e-07, "epoch": 1.89, "percentage": 94.46, "elapsed_time": "1 day, 22:18:18", "remaining_time": "2:42:48"} +{"current_steps": 26700, "total_steps": 28254, "loss": 0.7256, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7659491794742595e-07, "epoch": 1.89, "percentage": 94.5, "elapsed_time": "1 day, 22:19:21", "remaining_time": "2:41:45"} +{"current_steps": 26710, "total_steps": 28254, "loss": 0.6671, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.718034025719769e-07, "epoch": 1.89, "percentage": 94.54, "elapsed_time": "1 day, 22:20:21", "remaining_time": "2:40:43"} +{"current_steps": 26720, "total_steps": 28254, "loss": 0.7007, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.670423361542191e-07, "epoch": 1.89, "percentage": 94.57, "elapsed_time": "1 day, 22:21:24", "remaining_time": "2:39:40"} +{"current_steps": 26730, "total_steps": 28254, "loss": 0.6817, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.623117245804719e-07, "epoch": 1.89, "percentage": 94.61, "elapsed_time": "1 day, 22:22:29", "remaining_time": "2:38:38"} +{"current_steps": 26740, "total_steps": 28254, "loss": 0.7201, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.576115736994096e-07, "epoch": 1.89, "percentage": 94.64, "elapsed_time": "1 day, 22:23:30", "remaining_time": "2:37:36"} +{"current_steps": 26750, "total_steps": 28254, "loss": 0.7237, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.5294188932203653e-07, "epoch": 1.89, "percentage": 94.68, "elapsed_time": "1 day, 22:24:32", "remaining_time": "2:36:33"} +{"current_steps": 26760, "total_steps": 28254, "loss": 0.6799, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4830267722170684e-07, "epoch": 1.89, "percentage": 94.71, "elapsed_time": "1 day, 22:25:37", "remaining_time": "2:35:31"} +{"current_steps": 26770, "total_steps": 28254, "loss": 0.7261, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.436939431340769e-07, "epoch": 1.89, "percentage": 94.75, "elapsed_time": "1 day, 22:26:38", "remaining_time": "2:34:28"} +{"current_steps": 26780, "total_steps": 28254, "loss": 0.703, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3911569275713594e-07, "epoch": 1.9, "percentage": 94.78, "elapsed_time": "1 day, 22:27:41", "remaining_time": "2:33:26"} +{"current_steps": 26790, "total_steps": 28254, "loss": 0.6719, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3456793175118685e-07, "epoch": 1.9, "percentage": 94.82, "elapsed_time": "1 day, 22:28:43", "remaining_time": "2:32:23"} +{"current_steps": 26800, "total_steps": 28254, "loss": 0.6846, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.300506657388347e-07, "epoch": 1.9, "percentage": 94.85, "elapsed_time": "1 day, 22:29:45", "remaining_time": "2:31:21"} +{"current_steps": 26810, "total_steps": 28254, "loss": 0.6572, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.25563900304976e-07, "epoch": 1.9, "percentage": 94.89, "elapsed_time": "1 day, 22:30:48", "remaining_time": "2:30:18"} +{"current_steps": 26820, "total_steps": 28254, "loss": 0.722, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.211076409968039e-07, "epoch": 1.9, "percentage": 94.92, "elapsed_time": "1 day, 22:31:51", "remaining_time": "2:29:16"} +{"current_steps": 26830, "total_steps": 28254, "loss": 0.7193, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1668189332379194e-07, "epoch": 1.9, "percentage": 94.96, "elapsed_time": "1 day, 22:32:53", "remaining_time": "2:28:13"} +{"current_steps": 26840, "total_steps": 28254, "loss": 0.7138, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.1228666275769925e-07, "epoch": 1.9, "percentage": 95.0, "elapsed_time": "1 day, 22:33:57", "remaining_time": "2:27:11"} +{"current_steps": 26850, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.079219547325429e-07, "epoch": 1.9, "percentage": 95.03, "elapsed_time": "1 day, 22:34:59", "remaining_time": "2:26:09"} +{"current_steps": 26860, "total_steps": 28254, "loss": 0.6791, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.03587774644612e-07, "epoch": 1.9, "percentage": 95.07, "elapsed_time": "1 day, 22:36:02", "remaining_time": "2:25:06"} +{"current_steps": 26870, "total_steps": 28254, "loss": 0.6882, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9928412785244777e-07, "epoch": 1.9, "percentage": 95.1, "elapsed_time": "1 day, 22:37:03", "remaining_time": "2:24:04"} +{"current_steps": 26880, "total_steps": 28254, "loss": 0.7039, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.950110196768496e-07, "epoch": 1.9, "percentage": 95.14, "elapsed_time": "1 day, 22:38:05", "remaining_time": "2:23:01"} +{"current_steps": 26890, "total_steps": 28254, "loss": 0.6818, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9076845540084993e-07, "epoch": 1.9, "percentage": 95.17, "elapsed_time": "1 day, 22:39:07", "remaining_time": "2:21:59"} +{"current_steps": 26900, "total_steps": 28254, "loss": 0.7056, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8655644026972514e-07, "epoch": 1.9, "percentage": 95.21, "elapsed_time": "1 day, 22:40:09", "remaining_time": "2:20:56"} +{"current_steps": 26910, "total_steps": 28254, "loss": 0.7029, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.823749794909819e-07, "epoch": 1.9, "percentage": 95.24, "elapsed_time": "1 day, 22:41:13", "remaining_time": "2:19:54"} +{"current_steps": 26920, "total_steps": 28254, "loss": 0.7107, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.782240782343487e-07, "epoch": 1.91, "percentage": 95.28, "elapsed_time": "1 day, 22:42:17", "remaining_time": "2:18:51"} +{"current_steps": 26930, "total_steps": 28254, "loss": 0.7263, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.741037416317732e-07, "epoch": 1.91, "percentage": 95.31, "elapsed_time": "1 day, 22:43:19", "remaining_time": "2:17:49"} +{"current_steps": 26940, "total_steps": 28254, "loss": 0.6971, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.700139747774166e-07, "epoch": 1.91, "percentage": 95.35, "elapsed_time": "1 day, 22:44:21", "remaining_time": "2:16:46"} +{"current_steps": 26950, "total_steps": 28254, "loss": 0.7139, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6595478272763985e-07, "epoch": 1.91, "percentage": 95.38, "elapsed_time": "1 day, 22:45:23", "remaining_time": "2:15:44"} +{"current_steps": 26960, "total_steps": 28254, "loss": 0.6651, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6192617050100897e-07, "epoch": 1.91, "percentage": 95.42, "elapsed_time": "1 day, 22:46:25", "remaining_time": "2:14:42"} +{"current_steps": 26970, "total_steps": 28254, "loss": 0.7181, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.579281430782815e-07, "epoch": 1.91, "percentage": 95.46, "elapsed_time": "1 day, 22:47:26", "remaining_time": "2:13:39"} +{"current_steps": 26980, "total_steps": 28254, "loss": 0.6995, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.539607054023979e-07, "epoch": 1.91, "percentage": 95.49, "elapsed_time": "1 day, 22:48:28", "remaining_time": "2:12:37"} +{"current_steps": 26990, "total_steps": 28254, "loss": 0.699, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.50023862378479e-07, "epoch": 1.91, "percentage": 95.53, "elapsed_time": "1 day, 22:49:30", "remaining_time": "2:11:34"} +{"current_steps": 27000, "total_steps": 28254, "loss": 0.718, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4611761887382844e-07, "epoch": 1.91, "percentage": 95.56, "elapsed_time": "1 day, 22:50:34", "remaining_time": "2:10:32"} +{"current_steps": 27010, "total_steps": 28254, "loss": 0.6785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4224197971791094e-07, "epoch": 1.91, "percentage": 95.6, "elapsed_time": "1 day, 22:51:36", "remaining_time": "2:09:29"} +{"current_steps": 27020, "total_steps": 28254, "loss": 0.7222, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3839694970234917e-07, "epoch": 1.91, "percentage": 95.63, "elapsed_time": "1 day, 22:52:37", "remaining_time": "2:08:27"} +{"current_steps": 27030, "total_steps": 28254, "loss": 0.6785, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3458253358093485e-07, "epoch": 1.91, "percentage": 95.67, "elapsed_time": "1 day, 22:53:39", "remaining_time": "2:07:24"} +{"current_steps": 27040, "total_steps": 28254, "loss": 0.6973, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.3079873606960122e-07, "epoch": 1.91, "percentage": 95.7, "elapsed_time": "1 day, 22:54:42", "remaining_time": "2:06:22"} +{"current_steps": 27050, "total_steps": 28254, "loss": 0.7146, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.27045561846434e-07, "epoch": 1.91, "percentage": 95.74, "elapsed_time": "1 day, 22:55:44", "remaining_time": "2:05:19"} +{"current_steps": 27060, "total_steps": 28254, "loss": 0.7284, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.2332301555164638e-07, "epoch": 1.92, "percentage": 95.77, "elapsed_time": "1 day, 22:56:46", "remaining_time": "2:04:17"} +{"current_steps": 27070, "total_steps": 28254, "loss": 0.702, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1963110178759017e-07, "epoch": 1.92, "percentage": 95.81, "elapsed_time": "1 day, 22:57:46", "remaining_time": "2:03:14"} +{"current_steps": 27080, "total_steps": 28254, "loss": 0.7291, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.159698251187531e-07, "epoch": 1.92, "percentage": 95.84, "elapsed_time": "1 day, 22:58:49", "remaining_time": "2:02:12"} +{"current_steps": 27090, "total_steps": 28254, "loss": 0.7087, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1233919007173086e-07, "epoch": 1.92, "percentage": 95.88, "elapsed_time": "1 day, 22:59:53", "remaining_time": "2:01:09"} +{"current_steps": 27100, "total_steps": 28254, "loss": 0.7054, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0873920113524958e-07, "epoch": 1.92, "percentage": 95.92, "elapsed_time": "1 day, 23:00:55", "remaining_time": "2:00:07"} +{"current_steps": 27110, "total_steps": 28254, "loss": 0.6835, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0516986276013227e-07, "epoch": 1.92, "percentage": 95.95, "elapsed_time": "1 day, 23:01:58", "remaining_time": "1:59:04"} +{"current_steps": 27120, "total_steps": 28254, "loss": 0.7169, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0163117935931564e-07, "epoch": 1.92, "percentage": 95.99, "elapsed_time": "1 day, 23:03:02", "remaining_time": "1:58:02"} +{"current_steps": 27130, "total_steps": 28254, "loss": 0.6897, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9812315530783886e-07, "epoch": 1.92, "percentage": 96.02, "elapsed_time": "1 day, 23:04:04", "remaining_time": "1:57:00"} +{"current_steps": 27140, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.9464579494282708e-07, "epoch": 1.92, "percentage": 96.06, "elapsed_time": "1 day, 23:05:08", "remaining_time": "1:55:57"} +{"current_steps": 27150, "total_steps": 28254, "loss": 0.7111, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.911991025634996e-07, "epoch": 1.92, "percentage": 96.09, "elapsed_time": "1 day, 23:06:09", "remaining_time": "1:54:55"} +{"current_steps": 27160, "total_steps": 28254, "loss": 0.7009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8778308243115884e-07, "epoch": 1.92, "percentage": 96.13, "elapsed_time": "1 day, 23:07:10", "remaining_time": "1:53:52"} +{"current_steps": 27170, "total_steps": 28254, "loss": 0.6859, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8439773876918753e-07, "epoch": 1.92, "percentage": 96.16, "elapsed_time": "1 day, 23:08:13", "remaining_time": "1:52:50"} +{"current_steps": 27180, "total_steps": 28254, "loss": 0.7101, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.810430757630377e-07, "epoch": 1.92, "percentage": 96.2, "elapsed_time": "1 day, 23:09:17", "remaining_time": "1:51:47"} +{"current_steps": 27190, "total_steps": 28254, "loss": 0.6874, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7771909756023054e-07, "epoch": 1.92, "percentage": 96.23, "elapsed_time": "1 day, 23:10:21", "remaining_time": "1:50:45"} +{"current_steps": 27200, "total_steps": 28254, "loss": 0.7033, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7442580827035094e-07, "epoch": 1.93, "percentage": 96.27, "elapsed_time": "1 day, 23:11:23", "remaining_time": "1:49:42"} +{"current_steps": 27210, "total_steps": 28254, "loss": 0.66, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7116321196504749e-07, "epoch": 1.93, "percentage": 96.3, "elapsed_time": "1 day, 23:12:26", "remaining_time": "1:48:40"} +{"current_steps": 27220, "total_steps": 28254, "loss": 0.7304, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.679313126780102e-07, "epoch": 1.93, "percentage": 96.34, "elapsed_time": "1 day, 23:13:27", "remaining_time": "1:47:38"} +{"current_steps": 27230, "total_steps": 28254, "loss": 0.6865, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6473011440498454e-07, "epoch": 1.93, "percentage": 96.38, "elapsed_time": "1 day, 23:14:31", "remaining_time": "1:46:35"} +{"current_steps": 27240, "total_steps": 28254, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.615596211037601e-07, "epoch": 1.93, "percentage": 96.41, "elapsed_time": "1 day, 23:15:35", "remaining_time": "1:45:33"} +{"current_steps": 27250, "total_steps": 28254, "loss": 0.6896, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5841983669415695e-07, "epoch": 1.93, "percentage": 96.45, "elapsed_time": "1 day, 23:16:40", "remaining_time": "1:44:30"} +{"current_steps": 27260, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5531076505803655e-07, "epoch": 1.93, "percentage": 96.48, "elapsed_time": "1 day, 23:17:43", "remaining_time": "1:43:28"} +{"current_steps": 27270, "total_steps": 28254, "loss": 0.7259, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5223241003928524e-07, "epoch": 1.93, "percentage": 96.52, "elapsed_time": "1 day, 23:18:44", "remaining_time": "1:42:25"} +{"current_steps": 27280, "total_steps": 28254, "loss": 0.715, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.491847754438086e-07, "epoch": 1.93, "percentage": 96.55, "elapsed_time": "1 day, 23:19:49", "remaining_time": "1:41:23"} +{"current_steps": 27290, "total_steps": 28254, "loss": 0.6879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.461678650395426e-07, "epoch": 1.93, "percentage": 96.59, "elapsed_time": "1 day, 23:20:53", "remaining_time": "1:40:21"} +{"current_steps": 27300, "total_steps": 28254, "loss": 0.6827, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4318168255642585e-07, "epoch": 1.93, "percentage": 96.62, "elapsed_time": "1 day, 23:21:58", "remaining_time": "1:39:18"} +{"current_steps": 27310, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4022623168641336e-07, "epoch": 1.93, "percentage": 96.66, "elapsed_time": "1 day, 23:23:00", "remaining_time": "1:38:16"} +{"current_steps": 27320, "total_steps": 28254, "loss": 0.692, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3730151608346285e-07, "epoch": 1.93, "percentage": 96.69, "elapsed_time": "1 day, 23:24:02", "remaining_time": "1:37:13"} +{"current_steps": 27330, "total_steps": 28254, "loss": 0.7285, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.344075393635319e-07, "epoch": 1.93, "percentage": 96.73, "elapsed_time": "1 day, 23:25:06", "remaining_time": "1:36:11"} +{"current_steps": 27340, "total_steps": 28254, "loss": 0.7337, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3154430510457506e-07, "epoch": 1.94, "percentage": 96.77, "elapsed_time": "1 day, 23:26:10", "remaining_time": "1:35:09"} +{"current_steps": 27350, "total_steps": 28254, "loss": 0.6814, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2871181684654399e-07, "epoch": 1.94, "percentage": 96.8, "elapsed_time": "1 day, 23:27:12", "remaining_time": "1:34:06"} +{"current_steps": 27360, "total_steps": 28254, "loss": 0.7109, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2591007809136247e-07, "epoch": 1.94, "percentage": 96.84, "elapsed_time": "1 day, 23:28:14", "remaining_time": "1:33:04"} +{"current_steps": 27370, "total_steps": 28254, "loss": 0.6875, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2313909230295684e-07, "epoch": 1.94, "percentage": 96.87, "elapsed_time": "1 day, 23:29:16", "remaining_time": "1:32:01"} +{"current_steps": 27380, "total_steps": 28254, "loss": 0.6764, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2039886290722003e-07, "epoch": 1.94, "percentage": 96.91, "elapsed_time": "1 day, 23:30:21", "remaining_time": "1:30:59"} +{"current_steps": 27390, "total_steps": 28254, "loss": 0.6924, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1768939329201978e-07, "epoch": 1.94, "percentage": 96.94, "elapsed_time": "1 day, 23:31:22", "remaining_time": "1:29:56"} +{"current_steps": 27400, "total_steps": 28254, "loss": 0.7136, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1501068680719595e-07, "epoch": 1.94, "percentage": 96.98, "elapsed_time": "1 day, 23:32:25", "remaining_time": "1:28:54"} +{"current_steps": 27410, "total_steps": 28254, "loss": 0.7331, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1236274676456049e-07, "epoch": 1.94, "percentage": 97.01, "elapsed_time": "1 day, 23:33:29", "remaining_time": "1:27:51"} +{"current_steps": 27420, "total_steps": 28254, "loss": 0.737, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1000590874080952e-07, "epoch": 1.94, "percentage": 97.05, "elapsed_time": "1 day, 23:34:32", "remaining_time": "1:26:49"} +{"current_steps": 27430, "total_steps": 28254, "loss": 0.6887, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0741643392611355e-07, "epoch": 1.94, "percentage": 97.08, "elapsed_time": "1 day, 23:35:35", "remaining_time": "1:25:46"} +{"current_steps": 27440, "total_steps": 28254, "loss": 0.7165, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0485773494272321e-07, "epoch": 1.94, "percentage": 97.12, "elapsed_time": "1 day, 23:36:39", "remaining_time": "1:24:44"} +{"current_steps": 27450, "total_steps": 28254, "loss": 0.6757, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.023298149540719e-07, "epoch": 1.94, "percentage": 97.15, "elapsed_time": "1 day, 23:37:41", "remaining_time": "1:23:42"} +{"current_steps": 27460, "total_steps": 28254, "loss": 0.6937, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.983267708554289e-08, "epoch": 1.94, "percentage": 97.19, "elapsed_time": "1 day, 23:38:47", "remaining_time": "1:22:39"} +{"current_steps": 27470, "total_steps": 28254, "loss": 0.6886, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.736632442446103e-08, "epoch": 1.94, "percentage": 97.23, "elapsed_time": "1 day, 23:39:50", "remaining_time": "1:21:37"} +{"current_steps": 27480, "total_steps": 28254, "loss": 0.7121, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.493076002008994e-08, "epoch": 1.95, "percentage": 97.26, "elapsed_time": "1 day, 23:40:52", "remaining_time": "1:20:34"} +{"current_steps": 27490, "total_steps": 28254, "loss": 0.7186, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.252598688362924e-08, "epoch": 1.95, "percentage": 97.3, "elapsed_time": "1 day, 23:41:55", "remaining_time": "1:19:32"} +{"current_steps": 27500, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.0152007988209e-08, "epoch": 1.95, "percentage": 97.33, "elapsed_time": "1 day, 23:42:57", "remaining_time": "1:18:29"} +{"current_steps": 27510, "total_steps": 28254, "loss": 0.716, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.780882626888421e-08, "epoch": 1.95, "percentage": 97.37, "elapsed_time": "1 day, 23:44:00", "remaining_time": "1:17:27"} +{"current_steps": 27520, "total_steps": 28254, "loss": 0.7589, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.572629670220156e-08, "epoch": 1.95, "percentage": 97.4, "elapsed_time": "1 day, 23:45:02", "remaining_time": "1:16:24"} +{"current_steps": 27530, "total_steps": 28254, "loss": 0.7078, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.34416375671776e-08, "epoch": 1.95, "percentage": 97.44, "elapsed_time": "1 day, 23:46:05", "remaining_time": "1:15:22"} +{"current_steps": 27540, "total_steps": 28254, "loss": 0.7089, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.118778390458492e-08, "epoch": 1.95, "percentage": 97.47, "elapsed_time": "1 day, 23:47:09", "remaining_time": "1:14:20"} +{"current_steps": 27550, "total_steps": 28254, "loss": 0.723, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.89647385009723e-08, "epoch": 1.95, "percentage": 97.51, "elapsed_time": "1 day, 23:48:11", "remaining_time": "1:13:17"} +{"current_steps": 27560, "total_steps": 28254, "loss": 0.7296, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.677250410478287e-08, "epoch": 1.95, "percentage": 97.54, "elapsed_time": "1 day, 23:49:13", "remaining_time": "1:12:15"} +{"current_steps": 27570, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.46110834263819e-08, "epoch": 1.95, "percentage": 97.58, "elapsed_time": "1 day, 23:50:16", "remaining_time": "1:11:12"} +{"current_steps": 27580, "total_steps": 28254, "loss": 0.7115, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.248047913802902e-08, "epoch": 1.95, "percentage": 97.61, "elapsed_time": "1 day, 23:51:20", "remaining_time": "1:10:10"} +{"current_steps": 27590, "total_steps": 28254, "loss": 0.7485, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.038069387388658e-08, "epoch": 1.95, "percentage": 97.65, "elapsed_time": "1 day, 23:52:22", "remaining_time": "1:09:07"} +{"current_steps": 27600, "total_steps": 28254, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.83117302300168e-08, "epoch": 1.95, "percentage": 97.69, "elapsed_time": "1 day, 23:53:24", "remaining_time": "1:08:05"} +{"current_steps": 27610, "total_steps": 28254, "loss": 0.669, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.627359076437357e-08, "epoch": 1.95, "percentage": 97.72, "elapsed_time": "1 day, 23:54:25", "remaining_time": "1:07:02"} +{"current_steps": 27620, "total_steps": 28254, "loss": 0.7157, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.426627799679674e-08, "epoch": 1.96, "percentage": 97.76, "elapsed_time": "1 day, 23:55:25", "remaining_time": "1:06:00"} +{"current_steps": 27630, "total_steps": 28254, "loss": 0.6938, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.228979440902339e-08, "epoch": 1.96, "percentage": 97.79, "elapsed_time": "1 day, 23:56:28", "remaining_time": "1:04:57"} +{"current_steps": 27640, "total_steps": 28254, "loss": 0.7006, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.034414244466547e-08, "epoch": 1.96, "percentage": 97.83, "elapsed_time": "1 day, 23:57:31", "remaining_time": "1:03:55"} +{"current_steps": 27650, "total_steps": 28254, "loss": 0.6623, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.842932450922656e-08, "epoch": 1.96, "percentage": 97.86, "elapsed_time": "1 day, 23:58:35", "remaining_time": "1:02:52"} +{"current_steps": 27660, "total_steps": 28254, "loss": 0.6881, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.6545342970076855e-08, "epoch": 1.96, "percentage": 97.9, "elapsed_time": "1 day, 23:59:37", "remaining_time": "1:01:50"} +{"current_steps": 27670, "total_steps": 28254, "loss": 0.6943, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.4692200156472583e-08, "epoch": 1.96, "percentage": 97.93, "elapsed_time": "2 days, 0:00:41", "remaining_time": "1:00:47"} +{"current_steps": 27680, "total_steps": 28254, "loss": 0.7158, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.286989835953382e-08, "epoch": 1.96, "percentage": 97.97, "elapsed_time": "2 days, 0:01:43", "remaining_time": "0:59:45"} +{"current_steps": 27690, "total_steps": 28254, "loss": 0.7247, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.107843983226113e-08, "epoch": 1.96, "percentage": 98.0, "elapsed_time": "2 days, 0:02:48", "remaining_time": "0:58:43"} +{"current_steps": 27700, "total_steps": 28254, "loss": 0.7313, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.931782678951058e-08, "epoch": 1.96, "percentage": 98.04, "elapsed_time": "2 days, 0:03:48", "remaining_time": "0:57:40"} +{"current_steps": 27710, "total_steps": 28254, "loss": 0.7065, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.7588061408013195e-08, "epoch": 1.96, "percentage": 98.07, "elapsed_time": "2 days, 0:04:51", "remaining_time": "0:56:38"} +{"current_steps": 27720, "total_steps": 28254, "loss": 0.7159, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.588914582635273e-08, "epoch": 1.96, "percentage": 98.11, "elapsed_time": "2 days, 0:05:56", "remaining_time": "0:55:35"} +{"current_steps": 27730, "total_steps": 28254, "loss": 0.7211, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.422108214497678e-08, "epoch": 1.96, "percentage": 98.15, "elapsed_time": "2 days, 0:06:58", "remaining_time": "0:54:33"} +{"current_steps": 27740, "total_steps": 28254, "loss": 0.7073, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.2583872426185665e-08, "epoch": 1.96, "percentage": 98.18, "elapsed_time": "2 days, 0:08:01", "remaining_time": "0:53:30"} +{"current_steps": 27750, "total_steps": 28254, "loss": 0.7067, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.097751869414079e-08, "epoch": 1.96, "percentage": 98.22, "elapsed_time": "2 days, 0:09:05", "remaining_time": "0:52:28"} +{"current_steps": 27760, "total_steps": 28254, "loss": 0.7176, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.940202293484518e-08, "epoch": 1.96, "percentage": 98.25, "elapsed_time": "2 days, 0:10:08", "remaining_time": "0:51:25"} +{"current_steps": 27770, "total_steps": 28254, "loss": 0.7229, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.785738709616016e-08, "epoch": 1.97, "percentage": 98.29, "elapsed_time": "2 days, 0:11:10", "remaining_time": "0:50:23"} +{"current_steps": 27780, "total_steps": 28254, "loss": 0.6988, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.634361308778866e-08, "epoch": 1.97, "percentage": 98.32, "elapsed_time": "2 days, 0:12:11", "remaining_time": "0:49:20"} +{"current_steps": 27790, "total_steps": 28254, "loss": 0.7275, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.486070278127529e-08, "epoch": 1.97, "percentage": 98.36, "elapsed_time": "2 days, 0:13:14", "remaining_time": "0:48:18"} +{"current_steps": 27800, "total_steps": 28254, "loss": 0.6845, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.3408658010011805e-08, "epoch": 1.97, "percentage": 98.39, "elapsed_time": "2 days, 0:14:14", "remaining_time": "0:47:15"} +{"current_steps": 27810, "total_steps": 28254, "loss": 0.733, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.198748056922607e-08, "epoch": 1.97, "percentage": 98.43, "elapsed_time": "2 days, 0:15:16", "remaining_time": "0:46:13"} +{"current_steps": 27820, "total_steps": 28254, "loss": 0.7463, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.0597172215982014e-08, "epoch": 1.97, "percentage": 98.46, "elapsed_time": "2 days, 0:16:19", "remaining_time": "0:45:11"} +{"current_steps": 27830, "total_steps": 28254, "loss": 0.725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.9237734669185223e-08, "epoch": 1.97, "percentage": 98.5, "elapsed_time": "2 days, 0:17:24", "remaining_time": "0:44:08"} +{"current_steps": 27840, "total_steps": 28254, "loss": 0.7233, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.7909169609571796e-08, "epoch": 1.97, "percentage": 98.53, "elapsed_time": "2 days, 0:18:28", "remaining_time": "0:43:06"} +{"current_steps": 27850, "total_steps": 28254, "loss": 0.7196, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.6611478679700043e-08, "epoch": 1.97, "percentage": 98.57, "elapsed_time": "2 days, 0:19:32", "remaining_time": "0:42:03"} +{"current_steps": 27860, "total_steps": 28254, "loss": 0.7174, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.5344663483967135e-08, "epoch": 1.97, "percentage": 98.61, "elapsed_time": "2 days, 0:20:34", "remaining_time": "0:41:01"} +{"current_steps": 27870, "total_steps": 28254, "loss": 0.6863, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4108725588595227e-08, "epoch": 1.97, "percentage": 98.64, "elapsed_time": "2 days, 0:21:37", "remaining_time": "0:39:58"} +{"current_steps": 27880, "total_steps": 28254, "loss": 0.683, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.290366652163145e-08, "epoch": 1.97, "percentage": 98.68, "elapsed_time": "2 days, 0:22:40", "remaining_time": "0:38:56"} +{"current_steps": 27890, "total_steps": 28254, "loss": 0.7095, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.1729487772945145e-08, "epoch": 1.97, "percentage": 98.71, "elapsed_time": "2 days, 0:23:42", "remaining_time": "0:37:53"} +{"current_steps": 27900, "total_steps": 28254, "loss": 0.6825, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.0586190794222303e-08, "epoch": 1.97, "percentage": 98.75, "elapsed_time": "2 days, 0:24:42", "remaining_time": "0:36:51"} +{"current_steps": 27910, "total_steps": 28254, "loss": 0.7083, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.947377699897668e-08, "epoch": 1.98, "percentage": 98.78, "elapsed_time": "2 days, 0:25:45", "remaining_time": "0:35:48"} +{"current_steps": 27920, "total_steps": 28254, "loss": 0.7009, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8392247762535898e-08, "epoch": 1.98, "percentage": 98.82, "elapsed_time": "2 days, 0:26:47", "remaining_time": "0:34:46"} +{"current_steps": 27930, "total_steps": 28254, "loss": 0.6894, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.734160442204147e-08, "epoch": 1.98, "percentage": 98.85, "elapsed_time": "2 days, 0:27:48", "remaining_time": "0:33:43"} +{"current_steps": 27940, "total_steps": 28254, "loss": 0.731, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.6321848276454332e-08, "epoch": 1.98, "percentage": 98.89, "elapsed_time": "2 days, 0:28:49", "remaining_time": "0:32:41"} +{"current_steps": 27950, "total_steps": 28254, "loss": 0.6969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.5332980586543756e-08, "epoch": 1.98, "percentage": 98.92, "elapsed_time": "2 days, 0:29:51", "remaining_time": "0:31:38"} +{"current_steps": 27960, "total_steps": 28254, "loss": 0.7128, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.4375002574890106e-08, "epoch": 1.98, "percentage": 98.96, "elapsed_time": "2 days, 0:30:52", "remaining_time": "0:30:36"} +{"current_steps": 27970, "total_steps": 28254, "loss": 0.6867, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3447915425890411e-08, "epoch": 1.98, "percentage": 98.99, "elapsed_time": "2 days, 0:31:55", "remaining_time": "0:29:34"} +{"current_steps": 27980, "total_steps": 28254, "loss": 0.7137, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.2551720285738922e-08, "epoch": 1.98, "percentage": 99.03, "elapsed_time": "2 days, 0:32:57", "remaining_time": "0:28:31"} +{"current_steps": 27990, "total_steps": 28254, "loss": 0.6866, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1686418262443766e-08, "epoch": 1.98, "percentage": 99.07, "elapsed_time": "2 days, 0:33:59", "remaining_time": "0:27:29"} +{"current_steps": 28000, "total_steps": 28254, "loss": 0.7063, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0852010425818627e-08, "epoch": 1.98, "percentage": 99.1, "elapsed_time": "2 days, 0:35:00", "remaining_time": "0:26:26"} +{"current_steps": 28010, "total_steps": 28254, "loss": 0.6913, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.0048497807479963e-08, "epoch": 1.98, "percentage": 99.14, "elapsed_time": "2 days, 0:36:00", "remaining_time": "0:25:24"} +{"current_steps": 28020, "total_steps": 28254, "loss": 0.7079, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.275881400844232e-09, "epoch": 1.98, "percentage": 99.17, "elapsed_time": "2 days, 0:37:01", "remaining_time": "0:24:21"} +{"current_steps": 28030, "total_steps": 28254, "loss": 0.7065, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.534162161136228e-09, "epoch": 1.98, "percentage": 99.21, "elapsed_time": "2 days, 0:38:02", "remaining_time": "0:23:19"} +{"current_steps": 28040, "total_steps": 28254, "loss": 0.6902, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.823341005372409e-09, "epoch": 1.98, "percentage": 99.24, "elapsed_time": "2 days, 0:39:04", "remaining_time": "0:22:16"} +{"current_steps": 28050, "total_steps": 28254, "loss": 0.7062, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 7.143418812377567e-09, "epoch": 1.99, "percentage": 99.28, "elapsed_time": "2 days, 0:40:05", "remaining_time": "0:21:14"} +{"current_steps": 28060, "total_steps": 28254, "loss": 0.6914, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.494396422768168e-09, "epoch": 1.99, "percentage": 99.31, "elapsed_time": "2 days, 0:41:06", "remaining_time": "0:20:11"} +{"current_steps": 28070, "total_steps": 28254, "loss": 0.698, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.876274638960677e-09, "epoch": 1.99, "percentage": 99.35, "elapsed_time": "2 days, 0:42:08", "remaining_time": "0:19:09"} +{"current_steps": 28080, "total_steps": 28254, "loss": 0.7074, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 5.289054225166012e-09, "epoch": 1.99, "percentage": 99.38, "elapsed_time": "2 days, 0:43:10", "remaining_time": "0:18:06"} +{"current_steps": 28090, "total_steps": 28254, "loss": 0.6969, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.732735907392316e-09, "epoch": 1.99, "percentage": 99.42, "elapsed_time": "2 days, 0:44:14", "remaining_time": "0:17:04"} +{"current_steps": 28100, "total_steps": 28254, "loss": 0.7052, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.207320373442181e-09, "epoch": 1.99, "percentage": 99.45, "elapsed_time": "2 days, 0:45:19", "remaining_time": "0:16:01"} +{"current_steps": 28110, "total_steps": 28254, "loss": 0.7215, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.7128082729098734e-09, "epoch": 1.99, "percentage": 99.49, "elapsed_time": "2 days, 0:46:22", "remaining_time": "0:14:59"} +{"current_steps": 28120, "total_steps": 28254, "loss": 0.7025, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.2492002171813362e-09, "epoch": 1.99, "percentage": 99.53, "elapsed_time": "2 days, 0:47:25", "remaining_time": "0:13:57"} +{"current_steps": 28130, "total_steps": 28254, "loss": 0.7059, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.8164967794397366e-09, "epoch": 1.99, "percentage": 99.56, "elapsed_time": "2 days, 0:48:26", "remaining_time": "0:12:54"} +{"current_steps": 28140, "total_steps": 28254, "loss": 0.7147, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.4146984946515907e-09, "epoch": 1.99, "percentage": 99.6, "elapsed_time": "2 days, 0:49:28", "remaining_time": "0:11:52"} +{"current_steps": 28150, "total_steps": 28254, "loss": 0.7081, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 2.043805859583414e-09, "epoch": 1.99, "percentage": 99.63, "elapsed_time": "2 days, 0:50:31", "remaining_time": "0:10:49"} +{"current_steps": 28160, "total_steps": 28254, "loss": 0.7071, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.7038193327822972e-09, "epoch": 1.99, "percentage": 99.67, "elapsed_time": "2 days, 0:51:35", "remaining_time": "0:09:47"} +{"current_steps": 28170, "total_steps": 28254, "loss": 0.7082, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.3947393345925541e-09, "epoch": 1.99, "percentage": 99.7, "elapsed_time": "2 days, 0:52:37", "remaining_time": "0:08:44"} +{"current_steps": 28180, "total_steps": 28254, "loss": 0.6879, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.1165662471390724e-09, "epoch": 1.99, "percentage": 99.74, "elapsed_time": "2 days, 0:53:39", "remaining_time": "0:07:42"} +{"current_steps": 28190, "total_steps": 28254, "loss": 0.6987, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 8.693004143467409e-10, "epoch": 2.0, "percentage": 99.77, "elapsed_time": "2 days, 0:54:42", "remaining_time": "0:06:39"} +{"current_steps": 28200, "total_steps": 28254, "loss": 0.725, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 6.529421419154691e-10, "epoch": 2.0, "percentage": 99.81, "elapsed_time": "2 days, 0:55:45", "remaining_time": "0:05:37"} +{"current_steps": 28210, "total_steps": 28254, "loss": 0.7408, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 4.674916973396171e-10, "epoch": 2.0, "percentage": 99.84, "elapsed_time": "2 days, 0:56:50", "remaining_time": "0:04:34"} +{"current_steps": 28220, "total_steps": 28254, "loss": 0.6983, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.129493099024439e-10, "epoch": 2.0, "percentage": 99.88, "elapsed_time": "2 days, 0:57:52", "remaining_time": "0:03:32"} +{"current_steps": 28230, "total_steps": 28254, "loss": 0.7114, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 1.8931517067333204e-10, "epoch": 2.0, "percentage": 99.92, "elapsed_time": "2 days, 0:58:55", "remaining_time": "0:02:29"} +{"current_steps": 28240, "total_steps": 28254, "loss": 0.7046, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 9.658943250223651e-11, "epoch": 2.0, "percentage": 99.95, "elapsed_time": "2 days, 0:59:58", "remaining_time": "0:01:27"} +{"current_steps": 28250, "total_steps": 28254, "loss": 0.7057, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": 3.4772210030786786e-11, "epoch": 2.0, "percentage": 99.99, "elapsed_time": "2 days, 1:01:01", "remaining_time": "0:00:24"} +{"current_steps": 28254, "total_steps": 28254, "loss": null, "eval_loss": null, "predict_loss": null, "reward": null, "learning_rate": null, "epoch": 2.0, "percentage": 100.0, "elapsed_time": "2 days, 1:01:27", "remaining_time": "0:00:00"} diff --git a/trainer_state.json b/trainer_state.json new file mode 100644 index 0000000000000000000000000000000000000000..21f035ece888070d0059772c45ce5d5ed6abaa84 --- /dev/null +++ b/trainer_state.json @@ -0,0 +1,16975 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.9998938259807826, + "global_step": 28254, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "learning_rate": 4.999998454568244e-05, + "loss": 1.3539, + "step": 10 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999938182748876e-05, + "loss": 1.1833, + "step": 20 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999870029288556e-05, + "loss": 1.173, + "step": 30 + }, + { + "epoch": 0.0, + "learning_rate": 4.999976494017406e-05, + "loss": 1.0772, + "step": 40 + }, + { + "epoch": 0.0, + "learning_rate": 4.999962894271507e-05, + "loss": 1.0715, + "step": 50 + }, + { + "epoch": 0.0, + "learning_rate": 4.9999462037079705e-05, + "loss": 1.0268, + "step": 60 + }, + { + "epoch": 0.0, + "learning_rate": 4.999926422347434e-05, + "loss": 0.9807, + "step": 70 + }, + { + "epoch": 0.01, + "learning_rate": 4.999903550214352e-05, + "loss": 0.9862, + "step": 80 + }, + { + "epoch": 0.01, + "learning_rate": 4.999877587337004e-05, + "loss": 0.9725, + "step": 90 + }, + { + "epoch": 0.01, + "learning_rate": 4.999848533747488e-05, + "loss": 0.9993, + "step": 100 + }, + { + "epoch": 0.01, + "learning_rate": 4.999816389481725e-05, + "loss": 0.9596, + "step": 110 + }, + { + "epoch": 0.01, + "learning_rate": 4.999781154579456e-05, + "loss": 0.979, + "step": 120 + }, + { + "epoch": 0.01, + "learning_rate": 4.9997428290842444e-05, + "loss": 0.9748, + "step": 130 + }, + { + "epoch": 0.01, + "learning_rate": 4.999701413043471e-05, + "loss": 0.9309, + "step": 140 + }, + { + "epoch": 0.01, + "learning_rate": 4.999656906508344e-05, + "loss": 0.9143, + "step": 150 + }, + { + "epoch": 0.01, + "learning_rate": 4.999609309533887e-05, + "loss": 0.9439, + "step": 160 + }, + { + "epoch": 0.01, + "learning_rate": 4.999558622178947e-05, + "loss": 0.9286, + "step": 170 + }, + { + "epoch": 0.01, + "learning_rate": 4.99950484450619e-05, + "loss": 0.9544, + "step": 180 + }, + { + "epoch": 0.01, + "learning_rate": 4.999447976582104e-05, + "loss": 0.9355, + "step": 190 + }, + { + "epoch": 0.01, + "learning_rate": 4.999388018476998e-05, + "loss": 0.9154, + "step": 200 + }, + { + "epoch": 0.01, + "learning_rate": 4.999324970265001e-05, + "loss": 0.9326, + "step": 210 + }, + { + "epoch": 0.02, + "learning_rate": 4.999258832024061e-05, + "loss": 0.9215, + "step": 220 + }, + { + "epoch": 0.02, + "learning_rate": 4.99918960383595e-05, + "loss": 0.9281, + "step": 230 + }, + { + "epoch": 0.02, + "learning_rate": 4.9991172857862555e-05, + "loss": 0.935, + "step": 240 + }, + { + "epoch": 0.02, + "learning_rate": 4.99904187796439e-05, + "loss": 0.941, + "step": 250 + }, + { + "epoch": 0.02, + "learning_rate": 4.9989633804635814e-05, + "loss": 0.9377, + "step": 260 + }, + { + "epoch": 0.02, + "learning_rate": 4.9988817933808814e-05, + "loss": 0.9014, + "step": 270 + }, + { + "epoch": 0.02, + "learning_rate": 4.9987971168171585e-05, + "loss": 0.9323, + "step": 280 + }, + { + "epoch": 0.02, + "learning_rate": 4.998709350877103e-05, + "loss": 0.8987, + "step": 290 + }, + { + "epoch": 0.02, + "learning_rate": 4.998618495669224e-05, + "loss": 0.8933, + "step": 300 + }, + { + "epoch": 0.02, + "learning_rate": 4.9985245513058495e-05, + "loss": 0.893, + "step": 310 + }, + { + "epoch": 0.02, + "learning_rate": 4.9984275179031276e-05, + "loss": 0.909, + "step": 320 + }, + { + "epoch": 0.02, + "learning_rate": 4.998327395581025e-05, + "loss": 0.9235, + "step": 330 + }, + { + "epoch": 0.02, + "learning_rate": 4.9982241844633265e-05, + "loss": 0.8945, + "step": 340 + }, + { + "epoch": 0.02, + "learning_rate": 4.998117884677638e-05, + "loss": 0.9095, + "step": 350 + }, + { + "epoch": 0.03, + "learning_rate": 4.998008496355382e-05, + "loss": 0.8919, + "step": 360 + }, + { + "epoch": 0.03, + "learning_rate": 4.9978960196318006e-05, + "loss": 0.9088, + "step": 370 + }, + { + "epoch": 0.03, + "learning_rate": 4.997780454645954e-05, + "loss": 0.8985, + "step": 380 + }, + { + "epoch": 0.03, + "learning_rate": 4.99766180154072e-05, + "loss": 0.8972, + "step": 390 + }, + { + "epoch": 0.03, + "learning_rate": 4.9975400604627957e-05, + "loss": 0.8983, + "step": 400 + }, + { + "epoch": 0.03, + "learning_rate": 4.9974152315626935e-05, + "loss": 0.9115, + "step": 410 + }, + { + "epoch": 0.03, + "learning_rate": 4.997287314994746e-05, + "loss": 0.8957, + "step": 420 + }, + { + "epoch": 0.03, + "learning_rate": 4.997156310917103e-05, + "loss": 0.8681, + "step": 430 + }, + { + "epoch": 0.03, + "learning_rate": 4.9970222194917296e-05, + "loss": 0.894, + "step": 440 + }, + { + "epoch": 0.03, + "learning_rate": 4.996885040884409e-05, + "loss": 0.8798, + "step": 450 + }, + { + "epoch": 0.03, + "learning_rate": 4.996744775264743e-05, + "loss": 0.9034, + "step": 460 + }, + { + "epoch": 0.03, + "learning_rate": 4.996601422806147e-05, + "loss": 0.9033, + "step": 470 + }, + { + "epoch": 0.03, + "learning_rate": 4.9964549836858536e-05, + "loss": 0.8841, + "step": 480 + }, + { + "epoch": 0.03, + "learning_rate": 4.9963054580849134e-05, + "loss": 0.8877, + "step": 490 + }, + { + "epoch": 0.04, + "learning_rate": 4.996152846188191e-05, + "loss": 0.8729, + "step": 500 + }, + { + "epoch": 0.04, + "learning_rate": 4.995997148184369e-05, + "loss": 0.8853, + "step": 510 + }, + { + "epoch": 0.04, + "learning_rate": 4.9958383642659414e-05, + "loss": 0.8837, + "step": 520 + }, + { + "epoch": 0.04, + "learning_rate": 4.995676494629221e-05, + "loss": 0.8833, + "step": 530 + }, + { + "epoch": 0.04, + "learning_rate": 4.9955115394743354e-05, + "loss": 0.8843, + "step": 540 + }, + { + "epoch": 0.04, + "learning_rate": 4.995343499005225e-05, + "loss": 0.892, + "step": 550 + }, + { + "epoch": 0.04, + "learning_rate": 4.995172373429646e-05, + "loss": 0.8575, + "step": 560 + }, + { + "epoch": 0.04, + "learning_rate": 4.9949981629591705e-05, + "loss": 0.8311, + "step": 570 + }, + { + "epoch": 0.04, + "learning_rate": 4.99482086780918e-05, + "loss": 0.8669, + "step": 580 + }, + { + "epoch": 0.04, + "learning_rate": 4.994640488198874e-05, + "loss": 0.8388, + "step": 590 + }, + { + "epoch": 0.04, + "learning_rate": 4.994457024351264e-05, + "loss": 0.8424, + "step": 600 + }, + { + "epoch": 0.04, + "learning_rate": 4.994270476493175e-05, + "loss": 0.8676, + "step": 610 + }, + { + "epoch": 0.04, + "learning_rate": 4.994080844855243e-05, + "loss": 0.8598, + "step": 620 + }, + { + "epoch": 0.04, + "learning_rate": 4.993888129671921e-05, + "loss": 0.824, + "step": 630 + }, + { + "epoch": 0.05, + "learning_rate": 4.993692331181469e-05, + "loss": 0.8652, + "step": 640 + }, + { + "epoch": 0.05, + "learning_rate": 4.993493449625963e-05, + "loss": 0.8533, + "step": 650 + }, + { + "epoch": 0.05, + "learning_rate": 4.993291485251288e-05, + "loss": 0.8677, + "step": 660 + }, + { + "epoch": 0.05, + "learning_rate": 4.993086438307143e-05, + "loss": 0.8459, + "step": 670 + }, + { + "epoch": 0.05, + "learning_rate": 4.9928783090470365e-05, + "loss": 0.8626, + "step": 680 + }, + { + "epoch": 0.05, + "learning_rate": 4.992667097728287e-05, + "loss": 0.8127, + "step": 690 + }, + { + "epoch": 0.05, + "learning_rate": 4.992452804612027e-05, + "loss": 0.8716, + "step": 700 + }, + { + "epoch": 0.05, + "learning_rate": 4.992235429963195e-05, + "loss": 0.8544, + "step": 710 + }, + { + "epoch": 0.05, + "learning_rate": 4.992014974050542e-05, + "loss": 0.8562, + "step": 720 + }, + { + "epoch": 0.05, + "learning_rate": 4.991791437146627e-05, + "loss": 0.871, + "step": 730 + }, + { + "epoch": 0.05, + "learning_rate": 4.9915648195278186e-05, + "loss": 0.8453, + "step": 740 + }, + { + "epoch": 0.05, + "learning_rate": 4.9913351214742945e-05, + "loss": 0.8524, + "step": 750 + }, + { + "epoch": 0.05, + "learning_rate": 4.991102343270042e-05, + "loss": 0.8581, + "step": 760 + }, + { + "epoch": 0.05, + "learning_rate": 4.9908664852028545e-05, + "loss": 0.8477, + "step": 770 + }, + { + "epoch": 0.06, + "learning_rate": 4.990627547564335e-05, + "loss": 0.8651, + "step": 780 + }, + { + "epoch": 0.06, + "learning_rate": 4.990385530649891e-05, + "loss": 0.8453, + "step": 790 + }, + { + "epoch": 0.06, + "learning_rate": 4.9901404347587404e-05, + "loss": 0.8586, + "step": 800 + }, + { + "epoch": 0.06, + "learning_rate": 4.9898922601939056e-05, + "loss": 0.8746, + "step": 810 + }, + { + "epoch": 0.06, + "learning_rate": 4.989641007262218e-05, + "loss": 0.8652, + "step": 820 + }, + { + "epoch": 0.06, + "learning_rate": 4.98938667627431e-05, + "loss": 0.8531, + "step": 830 + }, + { + "epoch": 0.06, + "learning_rate": 4.989129267544626e-05, + "loss": 0.8686, + "step": 840 + }, + { + "epoch": 0.06, + "learning_rate": 4.988868781391408e-05, + "loss": 0.8692, + "step": 850 + }, + { + "epoch": 0.06, + "learning_rate": 4.988605218136711e-05, + "loss": 0.8274, + "step": 860 + }, + { + "epoch": 0.06, + "learning_rate": 4.9883385781063876e-05, + "loss": 0.8502, + "step": 870 + }, + { + "epoch": 0.06, + "learning_rate": 4.9880688616300975e-05, + "loss": 0.8445, + "step": 880 + }, + { + "epoch": 0.06, + "learning_rate": 4.9877960690413035e-05, + "loss": 0.8475, + "step": 890 + }, + { + "epoch": 0.06, + "learning_rate": 4.987520200677271e-05, + "loss": 0.8215, + "step": 900 + }, + { + "epoch": 0.06, + "learning_rate": 4.987241256879071e-05, + "loss": 0.8389, + "step": 910 + }, + { + "epoch": 0.07, + "learning_rate": 4.986959237991571e-05, + "loss": 0.8422, + "step": 920 + }, + { + "epoch": 0.07, + "learning_rate": 4.9866741443634455e-05, + "loss": 0.8287, + "step": 930 + }, + { + "epoch": 0.07, + "learning_rate": 4.986385976347169e-05, + "loss": 0.8694, + "step": 940 + }, + { + "epoch": 0.07, + "learning_rate": 4.986094734299016e-05, + "loss": 0.847, + "step": 950 + }, + { + "epoch": 0.07, + "learning_rate": 4.985800418579063e-05, + "loss": 0.8191, + "step": 960 + }, + { + "epoch": 0.07, + "learning_rate": 4.985503029551184e-05, + "loss": 0.8419, + "step": 970 + }, + { + "epoch": 0.07, + "learning_rate": 4.985202567583057e-05, + "loss": 0.8517, + "step": 980 + }, + { + "epoch": 0.07, + "learning_rate": 4.984899033046155e-05, + "loss": 0.8653, + "step": 990 + }, + { + "epoch": 0.07, + "learning_rate": 4.9845924263157526e-05, + "loss": 0.8349, + "step": 1000 + }, + { + "epoch": 0.07, + "learning_rate": 4.984282747770922e-05, + "loss": 0.8536, + "step": 1010 + }, + { + "epoch": 0.07, + "learning_rate": 4.983969997794531e-05, + "loss": 0.8882, + "step": 1020 + }, + { + "epoch": 0.07, + "learning_rate": 4.983654176773248e-05, + "loss": 0.8285, + "step": 1030 + }, + { + "epoch": 0.07, + "learning_rate": 4.983335285097537e-05, + "loss": 0.8503, + "step": 1040 + }, + { + "epoch": 0.07, + "learning_rate": 4.983013323161657e-05, + "loss": 0.8171, + "step": 1050 + }, + { + "epoch": 0.08, + "learning_rate": 4.982688291363666e-05, + "loss": 0.8398, + "step": 1060 + }, + { + "epoch": 0.08, + "learning_rate": 4.982360190105414e-05, + "loss": 0.8222, + "step": 1070 + }, + { + "epoch": 0.08, + "learning_rate": 4.982029019792548e-05, + "loss": 0.8333, + "step": 1080 + }, + { + "epoch": 0.08, + "learning_rate": 4.981694780834508e-05, + "loss": 0.8437, + "step": 1090 + }, + { + "epoch": 0.08, + "learning_rate": 4.981357473644531e-05, + "loss": 0.827, + "step": 1100 + }, + { + "epoch": 0.08, + "learning_rate": 4.9810170986396434e-05, + "loss": 0.8216, + "step": 1110 + }, + { + "epoch": 0.08, + "learning_rate": 4.980673656240667e-05, + "loss": 0.8253, + "step": 1120 + }, + { + "epoch": 0.08, + "learning_rate": 4.9803271468722146e-05, + "loss": 0.8195, + "step": 1130 + }, + { + "epoch": 0.08, + "learning_rate": 4.9799775709626926e-05, + "loss": 0.8394, + "step": 1140 + }, + { + "epoch": 0.08, + "learning_rate": 4.9796249289442966e-05, + "loss": 0.8348, + "step": 1150 + }, + { + "epoch": 0.08, + "learning_rate": 4.9792692212530134e-05, + "loss": 0.859, + "step": 1160 + }, + { + "epoch": 0.08, + "learning_rate": 4.978910448328622e-05, + "loss": 0.8043, + "step": 1170 + }, + { + "epoch": 0.08, + "learning_rate": 4.97854861061469e-05, + "loss": 0.8433, + "step": 1180 + }, + { + "epoch": 0.08, + "learning_rate": 4.978183708558571e-05, + "loss": 0.8244, + "step": 1190 + }, + { + "epoch": 0.08, + "learning_rate": 4.977815742611413e-05, + "loss": 0.8379, + "step": 1200 + }, + { + "epoch": 0.09, + "learning_rate": 4.977444713228147e-05, + "loss": 0.8471, + "step": 1210 + }, + { + "epoch": 0.09, + "learning_rate": 4.9770706208674946e-05, + "loss": 0.808, + "step": 1220 + }, + { + "epoch": 0.09, + "learning_rate": 4.976693465991963e-05, + "loss": 0.8384, + "step": 1230 + }, + { + "epoch": 0.09, + "learning_rate": 4.9763132490678453e-05, + "loss": 0.856, + "step": 1240 + }, + { + "epoch": 0.09, + "learning_rate": 4.975929970565222e-05, + "loss": 0.8382, + "step": 1250 + }, + { + "epoch": 0.09, + "learning_rate": 4.975543630957957e-05, + "loss": 0.8219, + "step": 1260 + }, + { + "epoch": 0.09, + "learning_rate": 4.975154230723699e-05, + "loss": 0.8384, + "step": 1270 + }, + { + "epoch": 0.09, + "learning_rate": 4.9747617703438824e-05, + "loss": 0.8276, + "step": 1280 + }, + { + "epoch": 0.09, + "learning_rate": 4.974366250303723e-05, + "loss": 0.8604, + "step": 1290 + }, + { + "epoch": 0.09, + "learning_rate": 4.97396767109222e-05, + "loss": 0.8471, + "step": 1300 + }, + { + "epoch": 0.09, + "learning_rate": 4.973566033202156e-05, + "loss": 0.8199, + "step": 1310 + }, + { + "epoch": 0.09, + "learning_rate": 4.973161337130094e-05, + "loss": 0.8243, + "step": 1320 + }, + { + "epoch": 0.09, + "learning_rate": 4.972753583376376e-05, + "loss": 0.7936, + "step": 1330 + }, + { + "epoch": 0.09, + "learning_rate": 4.972342772445129e-05, + "loss": 0.8231, + "step": 1340 + }, + { + "epoch": 0.1, + "learning_rate": 4.9719289048442566e-05, + "loss": 0.8223, + "step": 1350 + }, + { + "epoch": 0.1, + "learning_rate": 4.971511981085441e-05, + "loss": 0.8174, + "step": 1360 + }, + { + "epoch": 0.1, + "learning_rate": 4.9710920016841455e-05, + "loss": 0.8088, + "step": 1370 + }, + { + "epoch": 0.1, + "learning_rate": 4.9706689671596086e-05, + "loss": 0.8149, + "step": 1380 + }, + { + "epoch": 0.1, + "learning_rate": 4.970242878034847e-05, + "loss": 0.8522, + "step": 1390 + }, + { + "epoch": 0.1, + "learning_rate": 4.969813734836656e-05, + "loss": 0.8404, + "step": 1400 + }, + { + "epoch": 0.1, + "learning_rate": 4.969381538095602e-05, + "loss": 0.8608, + "step": 1410 + }, + { + "epoch": 0.1, + "learning_rate": 4.968946288346031e-05, + "loss": 0.8232, + "step": 1420 + }, + { + "epoch": 0.1, + "learning_rate": 4.968507986126063e-05, + "loss": 0.8368, + "step": 1430 + }, + { + "epoch": 0.1, + "learning_rate": 4.9680666319775884e-05, + "loss": 0.8154, + "step": 1440 + }, + { + "epoch": 0.1, + "learning_rate": 4.967622226446276e-05, + "loss": 0.8379, + "step": 1450 + }, + { + "epoch": 0.1, + "learning_rate": 4.9671747700815615e-05, + "loss": 0.8333, + "step": 1460 + }, + { + "epoch": 0.1, + "learning_rate": 4.966724263436658e-05, + "loss": 0.8542, + "step": 1470 + }, + { + "epoch": 0.1, + "learning_rate": 4.9662707070685476e-05, + "loss": 0.8421, + "step": 1480 + }, + { + "epoch": 0.11, + "learning_rate": 4.9658141015379805e-05, + "loss": 0.7827, + "step": 1490 + }, + { + "epoch": 0.11, + "learning_rate": 4.9653544474094805e-05, + "loss": 0.8659, + "step": 1500 + }, + { + "epoch": 0.11, + "learning_rate": 4.9648917452513384e-05, + "loss": 0.8166, + "step": 1510 + }, + { + "epoch": 0.11, + "learning_rate": 4.964425995635613e-05, + "loss": 0.8221, + "step": 1520 + }, + { + "epoch": 0.11, + "learning_rate": 4.963957199138134e-05, + "loss": 0.8129, + "step": 1530 + }, + { + "epoch": 0.11, + "learning_rate": 4.963485356338493e-05, + "loss": 0.8171, + "step": 1540 + }, + { + "epoch": 0.11, + "learning_rate": 4.9630104678200526e-05, + "loss": 0.7984, + "step": 1550 + }, + { + "epoch": 0.11, + "learning_rate": 4.962532534169939e-05, + "loss": 0.8109, + "step": 1560 + }, + { + "epoch": 0.11, + "learning_rate": 4.962051555979042e-05, + "loss": 0.8164, + "step": 1570 + }, + { + "epoch": 0.11, + "learning_rate": 4.9615675338420174e-05, + "loss": 0.8063, + "step": 1580 + }, + { + "epoch": 0.11, + "learning_rate": 4.961080468357284e-05, + "loss": 0.8123, + "step": 1590 + }, + { + "epoch": 0.11, + "learning_rate": 4.9605903601270234e-05, + "loss": 0.8322, + "step": 1600 + }, + { + "epoch": 0.11, + "learning_rate": 4.960097209757178e-05, + "loss": 0.8256, + "step": 1610 + }, + { + "epoch": 0.11, + "learning_rate": 4.959601017857451e-05, + "loss": 0.8113, + "step": 1620 + }, + { + "epoch": 0.12, + "learning_rate": 4.959101785041309e-05, + "loss": 0.8323, + "step": 1630 + }, + { + "epoch": 0.12, + "learning_rate": 4.958599511925975e-05, + "loss": 0.7911, + "step": 1640 + }, + { + "epoch": 0.12, + "learning_rate": 4.958094199132432e-05, + "loss": 0.8175, + "step": 1650 + }, + { + "epoch": 0.12, + "learning_rate": 4.957585847285422e-05, + "loss": 0.8114, + "step": 1660 + }, + { + "epoch": 0.12, + "learning_rate": 4.957074457013442e-05, + "loss": 0.7619, + "step": 1670 + }, + { + "epoch": 0.12, + "learning_rate": 4.956560028948749e-05, + "loss": 0.7909, + "step": 1680 + }, + { + "epoch": 0.12, + "learning_rate": 4.956042563727352e-05, + "loss": 0.8274, + "step": 1690 + }, + { + "epoch": 0.12, + "learning_rate": 4.955522061989018e-05, + "loss": 0.8251, + "step": 1700 + }, + { + "epoch": 0.12, + "learning_rate": 4.9549985243772664e-05, + "loss": 0.8129, + "step": 1710 + }, + { + "epoch": 0.12, + "learning_rate": 4.95447195153937e-05, + "loss": 0.8211, + "step": 1720 + }, + { + "epoch": 0.12, + "learning_rate": 4.9539423441263554e-05, + "loss": 0.8131, + "step": 1730 + }, + { + "epoch": 0.12, + "learning_rate": 4.9534097027930006e-05, + "loss": 0.7954, + "step": 1740 + }, + { + "epoch": 0.12, + "learning_rate": 4.952874028197833e-05, + "loss": 0.829, + "step": 1750 + }, + { + "epoch": 0.12, + "learning_rate": 4.9523353210031325e-05, + "loss": 0.8021, + "step": 1760 + }, + { + "epoch": 0.13, + "learning_rate": 4.9517935818749275e-05, + "loss": 0.8026, + "step": 1770 + }, + { + "epoch": 0.13, + "learning_rate": 4.951248811482993e-05, + "loss": 0.8616, + "step": 1780 + }, + { + "epoch": 0.13, + "learning_rate": 4.950701010500856e-05, + "loss": 0.8444, + "step": 1790 + }, + { + "epoch": 0.13, + "learning_rate": 4.950150179605785e-05, + "loss": 0.8206, + "step": 1800 + }, + { + "epoch": 0.13, + "learning_rate": 4.9495963194787986e-05, + "loss": 0.7956, + "step": 1810 + }, + { + "epoch": 0.13, + "learning_rate": 4.94903943080466e-05, + "loss": 0.7983, + "step": 1820 + }, + { + "epoch": 0.13, + "learning_rate": 4.948479514271874e-05, + "loss": 0.8392, + "step": 1830 + }, + { + "epoch": 0.13, + "learning_rate": 4.947916570572693e-05, + "loss": 0.8538, + "step": 1840 + }, + { + "epoch": 0.13, + "learning_rate": 4.947350600403108e-05, + "loss": 0.7881, + "step": 1850 + }, + { + "epoch": 0.13, + "learning_rate": 4.946781604462854e-05, + "loss": 0.8101, + "step": 1860 + }, + { + "epoch": 0.13, + "learning_rate": 4.946209583455407e-05, + "loss": 0.8344, + "step": 1870 + }, + { + "epoch": 0.13, + "learning_rate": 4.945634538087983e-05, + "loss": 0.8239, + "step": 1880 + }, + { + "epoch": 0.13, + "learning_rate": 4.945056469071536e-05, + "loss": 0.8351, + "step": 1890 + }, + { + "epoch": 0.13, + "learning_rate": 4.94447537712076e-05, + "loss": 0.7967, + "step": 1900 + }, + { + "epoch": 0.14, + "learning_rate": 4.943891262954083e-05, + "loss": 0.797, + "step": 1910 + }, + { + "epoch": 0.14, + "learning_rate": 4.9433041272936734e-05, + "loss": 0.8146, + "step": 1920 + }, + { + "epoch": 0.14, + "learning_rate": 4.942713970865435e-05, + "loss": 0.8237, + "step": 1930 + }, + { + "epoch": 0.14, + "learning_rate": 4.942120794399002e-05, + "loss": 0.7953, + "step": 1940 + }, + { + "epoch": 0.14, + "learning_rate": 4.9415245986277483e-05, + "loss": 0.8066, + "step": 1950 + }, + { + "epoch": 0.14, + "learning_rate": 4.940925384288775e-05, + "loss": 0.8232, + "step": 1960 + }, + { + "epoch": 0.14, + "learning_rate": 4.940323152122921e-05, + "loss": 0.8156, + "step": 1970 + }, + { + "epoch": 0.14, + "learning_rate": 4.939717902874751e-05, + "loss": 0.8062, + "step": 1980 + }, + { + "epoch": 0.14, + "learning_rate": 4.9391096372925626e-05, + "loss": 0.7818, + "step": 1990 + }, + { + "epoch": 0.14, + "learning_rate": 4.9384983561283824e-05, + "loss": 0.8105, + "step": 2000 + }, + { + "epoch": 0.14, + "learning_rate": 4.937884060137966e-05, + "loss": 0.8112, + "step": 2010 + }, + { + "epoch": 0.14, + "learning_rate": 4.9372667500807944e-05, + "loss": 0.8102, + "step": 2020 + }, + { + "epoch": 0.14, + "learning_rate": 4.9366464267200755e-05, + "loss": 0.8369, + "step": 2030 + }, + { + "epoch": 0.14, + "learning_rate": 4.936023090822744e-05, + "loss": 0.7841, + "step": 2040 + }, + { + "epoch": 0.15, + "learning_rate": 4.935396743159459e-05, + "loss": 0.8299, + "step": 2050 + }, + { + "epoch": 0.15, + "learning_rate": 4.934767384504602e-05, + "loss": 0.8048, + "step": 2060 + }, + { + "epoch": 0.15, + "learning_rate": 4.934135015636276e-05, + "loss": 0.825, + "step": 2070 + }, + { + "epoch": 0.15, + "learning_rate": 4.93349963733631e-05, + "loss": 0.7928, + "step": 2080 + }, + { + "epoch": 0.15, + "learning_rate": 4.9328612503902496e-05, + "loss": 0.8016, + "step": 2090 + }, + { + "epoch": 0.15, + "learning_rate": 4.932219855587362e-05, + "loss": 0.8134, + "step": 2100 + }, + { + "epoch": 0.15, + "learning_rate": 4.931575453720633e-05, + "loss": 0.8109, + "step": 2110 + }, + { + "epoch": 0.15, + "learning_rate": 4.930928045586765e-05, + "loss": 0.7908, + "step": 2120 + }, + { + "epoch": 0.15, + "learning_rate": 4.9302776319861785e-05, + "loss": 0.7936, + "step": 2130 + }, + { + "epoch": 0.15, + "learning_rate": 4.92962421372301e-05, + "loss": 0.8008, + "step": 2140 + }, + { + "epoch": 0.15, + "learning_rate": 4.928967791605108e-05, + "loss": 0.8237, + "step": 2150 + }, + { + "epoch": 0.15, + "learning_rate": 4.92830836644404e-05, + "loss": 0.8127, + "step": 2160 + }, + { + "epoch": 0.15, + "learning_rate": 4.9276459390550815e-05, + "loss": 0.8168, + "step": 2170 + }, + { + "epoch": 0.15, + "learning_rate": 4.926980510257222e-05, + "loss": 0.805, + "step": 2180 + }, + { + "epoch": 0.16, + "learning_rate": 4.926312080873161e-05, + "loss": 0.8125, + "step": 2190 + }, + { + "epoch": 0.16, + "learning_rate": 4.9256406517293085e-05, + "loss": 0.8267, + "step": 2200 + }, + { + "epoch": 0.16, + "learning_rate": 4.924966223655782e-05, + "loss": 0.8405, + "step": 2210 + }, + { + "epoch": 0.16, + "learning_rate": 4.92428879748641e-05, + "loss": 0.7919, + "step": 2220 + }, + { + "epoch": 0.16, + "learning_rate": 4.923608374058721e-05, + "loss": 0.8398, + "step": 2230 + }, + { + "epoch": 0.16, + "learning_rate": 4.9229249542139576e-05, + "loss": 0.8179, + "step": 2240 + }, + { + "epoch": 0.16, + "learning_rate": 4.9222385387970604e-05, + "loss": 0.8156, + "step": 2250 + }, + { + "epoch": 0.16, + "learning_rate": 4.921549128656677e-05, + "loss": 0.8089, + "step": 2260 + }, + { + "epoch": 0.16, + "learning_rate": 4.920856724645155e-05, + "loss": 0.8244, + "step": 2270 + }, + { + "epoch": 0.16, + "learning_rate": 4.920161327618546e-05, + "loss": 0.8361, + "step": 2280 + }, + { + "epoch": 0.16, + "learning_rate": 4.919462938436602e-05, + "loss": 0.8159, + "step": 2290 + }, + { + "epoch": 0.16, + "learning_rate": 4.918761557962771e-05, + "loss": 0.8104, + "step": 2300 + }, + { + "epoch": 0.16, + "learning_rate": 4.9180571870642034e-05, + "loss": 0.7877, + "step": 2310 + }, + { + "epoch": 0.16, + "learning_rate": 4.917349826611744e-05, + "loss": 0.7967, + "step": 2320 + }, + { + "epoch": 0.16, + "learning_rate": 4.916639477479935e-05, + "loss": 0.7729, + "step": 2330 + }, + { + "epoch": 0.17, + "learning_rate": 4.915926140547013e-05, + "loss": 0.8578, + "step": 2340 + }, + { + "epoch": 0.17, + "learning_rate": 4.915209816694908e-05, + "loss": 0.8219, + "step": 2350 + }, + { + "epoch": 0.17, + "learning_rate": 4.914490506809245e-05, + "loss": 0.8145, + "step": 2360 + }, + { + "epoch": 0.17, + "learning_rate": 4.9137682117793395e-05, + "loss": 0.8132, + "step": 2370 + }, + { + "epoch": 0.17, + "learning_rate": 4.9130429324981963e-05, + "loss": 0.7872, + "step": 2380 + }, + { + "epoch": 0.17, + "learning_rate": 4.9123146698625134e-05, + "loss": 0.8177, + "step": 2390 + }, + { + "epoch": 0.17, + "learning_rate": 4.911583424772672e-05, + "loss": 0.8052, + "step": 2400 + }, + { + "epoch": 0.17, + "learning_rate": 4.910849198132747e-05, + "loss": 0.7646, + "step": 2410 + }, + { + "epoch": 0.17, + "learning_rate": 4.9101119908504935e-05, + "loss": 0.8199, + "step": 2420 + }, + { + "epoch": 0.17, + "learning_rate": 4.909371803837355e-05, + "loss": 0.7819, + "step": 2430 + }, + { + "epoch": 0.17, + "learning_rate": 4.908628638008458e-05, + "loss": 0.7957, + "step": 2440 + }, + { + "epoch": 0.17, + "learning_rate": 4.907882494282614e-05, + "loss": 0.8103, + "step": 2450 + }, + { + "epoch": 0.17, + "learning_rate": 4.907133373582312e-05, + "loss": 0.79, + "step": 2460 + }, + { + "epoch": 0.17, + "learning_rate": 4.9063812768337246e-05, + "loss": 0.8127, + "step": 2470 + }, + { + "epoch": 0.18, + "learning_rate": 4.905626204966705e-05, + "loss": 0.7915, + "step": 2480 + }, + { + "epoch": 0.18, + "learning_rate": 4.90486815891478e-05, + "loss": 0.8207, + "step": 2490 + }, + { + "epoch": 0.18, + "learning_rate": 4.9041071396151585e-05, + "loss": 0.8162, + "step": 2500 + }, + { + "epoch": 0.18, + "learning_rate": 4.903343148008722e-05, + "loss": 0.8055, + "step": 2510 + }, + { + "epoch": 0.18, + "learning_rate": 4.9025761850400283e-05, + "loss": 0.8019, + "step": 2520 + }, + { + "epoch": 0.18, + "learning_rate": 4.9018062516573086e-05, + "loss": 0.801, + "step": 2530 + }, + { + "epoch": 0.18, + "learning_rate": 4.901033348812467e-05, + "loss": 0.7831, + "step": 2540 + }, + { + "epoch": 0.18, + "learning_rate": 4.9002574774610776e-05, + "loss": 0.794, + "step": 2550 + }, + { + "epoch": 0.18, + "learning_rate": 4.899478638562386e-05, + "loss": 0.7902, + "step": 2560 + }, + { + "epoch": 0.18, + "learning_rate": 4.8986968330793054e-05, + "loss": 0.785, + "step": 2570 + }, + { + "epoch": 0.18, + "learning_rate": 4.897912061978418e-05, + "loss": 0.8006, + "step": 2580 + }, + { + "epoch": 0.18, + "learning_rate": 4.897124326229972e-05, + "loss": 0.8208, + "step": 2590 + }, + { + "epoch": 0.18, + "learning_rate": 4.896333626807881e-05, + "loss": 0.7793, + "step": 2600 + }, + { + "epoch": 0.18, + "learning_rate": 4.8955399646897215e-05, + "loss": 0.812, + "step": 2610 + }, + { + "epoch": 0.19, + "learning_rate": 4.894743340856735e-05, + "loss": 0.7948, + "step": 2620 + }, + { + "epoch": 0.19, + "learning_rate": 4.893943756293823e-05, + "loss": 0.7955, + "step": 2630 + }, + { + "epoch": 0.19, + "learning_rate": 4.893141211989549e-05, + "loss": 0.8363, + "step": 2640 + }, + { + "epoch": 0.19, + "learning_rate": 4.892335708936135e-05, + "loss": 0.7986, + "step": 2650 + }, + { + "epoch": 0.19, + "learning_rate": 4.89152724812946e-05, + "loss": 0.8249, + "step": 2660 + }, + { + "epoch": 0.19, + "learning_rate": 4.890715830569062e-05, + "loss": 0.7951, + "step": 2670 + }, + { + "epoch": 0.19, + "learning_rate": 4.889901457258133e-05, + "loss": 0.8098, + "step": 2680 + }, + { + "epoch": 0.19, + "learning_rate": 4.889084129203519e-05, + "loss": 0.7781, + "step": 2690 + }, + { + "epoch": 0.19, + "learning_rate": 4.888263847415721e-05, + "loss": 0.7817, + "step": 2700 + }, + { + "epoch": 0.19, + "learning_rate": 4.887440612908889e-05, + "loss": 0.7848, + "step": 2710 + }, + { + "epoch": 0.19, + "learning_rate": 4.886614426700826e-05, + "loss": 0.7965, + "step": 2720 + }, + { + "epoch": 0.19, + "learning_rate": 4.8857852898129844e-05, + "loss": 0.8067, + "step": 2730 + }, + { + "epoch": 0.19, + "learning_rate": 4.884953203270463e-05, + "loss": 0.7933, + "step": 2740 + }, + { + "epoch": 0.19, + "learning_rate": 4.884118168102008e-05, + "loss": 0.7918, + "step": 2750 + }, + { + "epoch": 0.2, + "learning_rate": 4.883280185340011e-05, + "loss": 0.7758, + "step": 2760 + }, + { + "epoch": 0.2, + "learning_rate": 4.8824392560205085e-05, + "loss": 0.7765, + "step": 2770 + }, + { + "epoch": 0.2, + "learning_rate": 4.88159538118318e-05, + "loss": 0.7848, + "step": 2780 + }, + { + "epoch": 0.2, + "learning_rate": 4.8807485618713463e-05, + "loss": 0.7852, + "step": 2790 + }, + { + "epoch": 0.2, + "learning_rate": 4.8798987991319686e-05, + "loss": 0.8201, + "step": 2800 + }, + { + "epoch": 0.2, + "learning_rate": 4.879046094015646e-05, + "loss": 0.8024, + "step": 2810 + }, + { + "epoch": 0.2, + "learning_rate": 4.8781904475766174e-05, + "loss": 0.7921, + "step": 2820 + }, + { + "epoch": 0.2, + "learning_rate": 4.877331860872758e-05, + "loss": 0.7541, + "step": 2830 + }, + { + "epoch": 0.2, + "learning_rate": 4.876470334965576e-05, + "loss": 0.7689, + "step": 2840 + }, + { + "epoch": 0.2, + "learning_rate": 4.875605870920217e-05, + "loss": 0.8107, + "step": 2850 + }, + { + "epoch": 0.2, + "learning_rate": 4.8747384698054546e-05, + "loss": 0.7784, + "step": 2860 + }, + { + "epoch": 0.2, + "learning_rate": 4.873868132693699e-05, + "loss": 0.7825, + "step": 2870 + }, + { + "epoch": 0.2, + "learning_rate": 4.872994860660985e-05, + "loss": 0.762, + "step": 2880 + }, + { + "epoch": 0.2, + "learning_rate": 4.872118654786979e-05, + "loss": 0.7719, + "step": 2890 + }, + { + "epoch": 0.21, + "learning_rate": 4.871239516154976e-05, + "loss": 0.8455, + "step": 2900 + }, + { + "epoch": 0.21, + "learning_rate": 4.870357445851893e-05, + "loss": 0.7819, + "step": 2910 + }, + { + "epoch": 0.21, + "learning_rate": 4.869472444968274e-05, + "loss": 0.7697, + "step": 2920 + }, + { + "epoch": 0.21, + "learning_rate": 4.8685845145982866e-05, + "loss": 0.7829, + "step": 2930 + }, + { + "epoch": 0.21, + "learning_rate": 4.867693655839719e-05, + "loss": 0.8084, + "step": 2940 + }, + { + "epoch": 0.21, + "learning_rate": 4.866799869793979e-05, + "loss": 0.8239, + "step": 2950 + }, + { + "epoch": 0.21, + "learning_rate": 4.8659031575660966e-05, + "loss": 0.7885, + "step": 2960 + }, + { + "epoch": 0.21, + "learning_rate": 4.865003520264717e-05, + "loss": 0.7958, + "step": 2970 + }, + { + "epoch": 0.21, + "learning_rate": 4.8641009590021035e-05, + "loss": 0.7812, + "step": 2980 + }, + { + "epoch": 0.21, + "learning_rate": 4.8631954748941327e-05, + "loss": 0.8139, + "step": 2990 + }, + { + "epoch": 0.21, + "learning_rate": 4.862287069060296e-05, + "loss": 0.7709, + "step": 3000 + }, + { + "epoch": 0.21, + "learning_rate": 4.861375742623697e-05, + "loss": 0.8124, + "step": 3010 + }, + { + "epoch": 0.21, + "learning_rate": 4.860461496711049e-05, + "loss": 0.8168, + "step": 3020 + }, + { + "epoch": 0.21, + "learning_rate": 4.8595443324526765e-05, + "loss": 0.8055, + "step": 3030 + }, + { + "epoch": 0.22, + "learning_rate": 4.858624250982512e-05, + "loss": 0.7721, + "step": 3040 + }, + { + "epoch": 0.22, + "learning_rate": 4.857701253438093e-05, + "loss": 0.8, + "step": 3050 + }, + { + "epoch": 0.22, + "learning_rate": 4.856775340960563e-05, + "loss": 0.825, + "step": 3060 + }, + { + "epoch": 0.22, + "learning_rate": 4.855846514694671e-05, + "loss": 0.8102, + "step": 3070 + }, + { + "epoch": 0.22, + "learning_rate": 4.854914775788766e-05, + "loss": 0.8078, + "step": 3080 + }, + { + "epoch": 0.22, + "learning_rate": 4.853980125394799e-05, + "loss": 0.7921, + "step": 3090 + }, + { + "epoch": 0.22, + "learning_rate": 4.853042564668321e-05, + "loss": 0.772, + "step": 3100 + }, + { + "epoch": 0.22, + "learning_rate": 4.8521020947684815e-05, + "loss": 0.8153, + "step": 3110 + }, + { + "epoch": 0.22, + "learning_rate": 4.8511587168580254e-05, + "loss": 0.7686, + "step": 3120 + }, + { + "epoch": 0.22, + "learning_rate": 4.850212432103294e-05, + "loss": 0.7748, + "step": 3130 + }, + { + "epoch": 0.22, + "learning_rate": 4.8492632416742214e-05, + "loss": 0.7876, + "step": 3140 + }, + { + "epoch": 0.22, + "learning_rate": 4.848311146744335e-05, + "loss": 0.8033, + "step": 3150 + }, + { + "epoch": 0.22, + "learning_rate": 4.847356148490755e-05, + "loss": 0.7947, + "step": 3160 + }, + { + "epoch": 0.22, + "learning_rate": 4.8463982480941865e-05, + "loss": 0.7956, + "step": 3170 + }, + { + "epoch": 0.23, + "learning_rate": 4.845437446738926e-05, + "loss": 0.8006, + "step": 3180 + }, + { + "epoch": 0.23, + "learning_rate": 4.844473745612857e-05, + "loss": 0.8075, + "step": 3190 + }, + { + "epoch": 0.23, + "learning_rate": 4.8435071459074456e-05, + "loss": 0.795, + "step": 3200 + }, + { + "epoch": 0.23, + "learning_rate": 4.842537648817743e-05, + "loss": 0.7916, + "step": 3210 + }, + { + "epoch": 0.23, + "learning_rate": 4.841565255542384e-05, + "loss": 0.7825, + "step": 3220 + }, + { + "epoch": 0.23, + "learning_rate": 4.84058996728358e-05, + "loss": 0.8057, + "step": 3230 + }, + { + "epoch": 0.23, + "learning_rate": 4.839611785247125e-05, + "loss": 0.7943, + "step": 3240 + }, + { + "epoch": 0.23, + "learning_rate": 4.8386307106423924e-05, + "loss": 0.8024, + "step": 3250 + }, + { + "epoch": 0.23, + "learning_rate": 4.8376467446823266e-05, + "loss": 0.7555, + "step": 3260 + }, + { + "epoch": 0.23, + "learning_rate": 4.8366598885834496e-05, + "loss": 0.7957, + "step": 3270 + }, + { + "epoch": 0.23, + "learning_rate": 4.835670143565857e-05, + "loss": 0.7763, + "step": 3280 + }, + { + "epoch": 0.23, + "learning_rate": 4.834677510853216e-05, + "loss": 0.8111, + "step": 3290 + }, + { + "epoch": 0.23, + "learning_rate": 4.8336819916727624e-05, + "loss": 0.764, + "step": 3300 + }, + { + "epoch": 0.23, + "learning_rate": 4.832683587255302e-05, + "loss": 0.7501, + "step": 3310 + }, + { + "epoch": 0.23, + "learning_rate": 4.831682298835208e-05, + "loss": 0.8185, + "step": 3320 + }, + { + "epoch": 0.24, + "learning_rate": 4.8306781276504186e-05, + "loss": 0.7918, + "step": 3330 + }, + { + "epoch": 0.24, + "learning_rate": 4.8296710749424355e-05, + "loss": 0.8076, + "step": 3340 + }, + { + "epoch": 0.24, + "learning_rate": 4.828661141956325e-05, + "loss": 0.8178, + "step": 3350 + }, + { + "epoch": 0.24, + "learning_rate": 4.8276483299407124e-05, + "loss": 0.8239, + "step": 3360 + }, + { + "epoch": 0.24, + "learning_rate": 4.826632640147783e-05, + "loss": 0.7565, + "step": 3370 + }, + { + "epoch": 0.24, + "learning_rate": 4.82561407383328e-05, + "loss": 0.8099, + "step": 3380 + }, + { + "epoch": 0.24, + "learning_rate": 4.824592632256504e-05, + "loss": 0.7945, + "step": 3390 + }, + { + "epoch": 0.24, + "learning_rate": 4.823568316680309e-05, + "loss": 0.7583, + "step": 3400 + }, + { + "epoch": 0.24, + "learning_rate": 4.822541128371104e-05, + "loss": 0.8081, + "step": 3410 + }, + { + "epoch": 0.24, + "learning_rate": 4.821511068598846e-05, + "loss": 0.7955, + "step": 3420 + }, + { + "epoch": 0.24, + "learning_rate": 4.820478138637048e-05, + "loss": 0.7948, + "step": 3430 + }, + { + "epoch": 0.24, + "learning_rate": 4.8194423397627654e-05, + "loss": 0.7969, + "step": 3440 + }, + { + "epoch": 0.24, + "learning_rate": 4.818403673256604e-05, + "loss": 0.7719, + "step": 3450 + }, + { + "epoch": 0.24, + "learning_rate": 4.817362140402716e-05, + "loss": 0.7689, + "step": 3460 + }, + { + "epoch": 0.25, + "learning_rate": 4.816317742488794e-05, + "loss": 0.7976, + "step": 3470 + }, + { + "epoch": 0.25, + "learning_rate": 4.815270480806075e-05, + "loss": 0.7869, + "step": 3480 + }, + { + "epoch": 0.25, + "learning_rate": 4.814220356649336e-05, + "loss": 0.8099, + "step": 3490 + }, + { + "epoch": 0.25, + "learning_rate": 4.813167371316894e-05, + "loss": 0.8057, + "step": 3500 + }, + { + "epoch": 0.25, + "learning_rate": 4.812111526110602e-05, + "loss": 0.764, + "step": 3510 + }, + { + "epoch": 0.25, + "learning_rate": 4.811052822335849e-05, + "loss": 0.7714, + "step": 3520 + }, + { + "epoch": 0.25, + "learning_rate": 4.8099912613015596e-05, + "loss": 0.8108, + "step": 3530 + }, + { + "epoch": 0.25, + "learning_rate": 4.808926844320189e-05, + "loss": 0.772, + "step": 3540 + }, + { + "epoch": 0.25, + "learning_rate": 4.807859572707725e-05, + "loss": 0.8022, + "step": 3550 + }, + { + "epoch": 0.25, + "learning_rate": 4.806789447783683e-05, + "loss": 0.7885, + "step": 3560 + }, + { + "epoch": 0.25, + "learning_rate": 4.8057164708711064e-05, + "loss": 0.7847, + "step": 3570 + }, + { + "epoch": 0.25, + "learning_rate": 4.804640643296568e-05, + "loss": 0.7756, + "step": 3580 + }, + { + "epoch": 0.25, + "learning_rate": 4.80356196639016e-05, + "loss": 0.7849, + "step": 3590 + }, + { + "epoch": 0.25, + "learning_rate": 4.8024804414855e-05, + "loss": 0.8072, + "step": 3600 + }, + { + "epoch": 0.26, + "learning_rate": 4.801396069919727e-05, + "loss": 0.7894, + "step": 3610 + }, + { + "epoch": 0.26, + "learning_rate": 4.800308853033498e-05, + "loss": 0.8029, + "step": 3620 + }, + { + "epoch": 0.26, + "learning_rate": 4.7992187921709895e-05, + "loss": 0.8059, + "step": 3630 + }, + { + "epoch": 0.26, + "learning_rate": 4.798125888679893e-05, + "loss": 0.7736, + "step": 3640 + }, + { + "epoch": 0.26, + "learning_rate": 4.7970301439114145e-05, + "loss": 0.7819, + "step": 3650 + }, + { + "epoch": 0.26, + "learning_rate": 4.795931559220273e-05, + "loss": 0.8138, + "step": 3660 + }, + { + "epoch": 0.26, + "learning_rate": 4.794830135964698e-05, + "loss": 0.7952, + "step": 3670 + }, + { + "epoch": 0.26, + "learning_rate": 4.79372587550643e-05, + "loss": 0.7933, + "step": 3680 + }, + { + "epoch": 0.26, + "learning_rate": 4.792618779210716e-05, + "loss": 0.7588, + "step": 3690 + }, + { + "epoch": 0.26, + "learning_rate": 4.79150884844631e-05, + "loss": 0.788, + "step": 3700 + }, + { + "epoch": 0.26, + "learning_rate": 4.790396084585469e-05, + "loss": 0.7668, + "step": 3710 + }, + { + "epoch": 0.26, + "learning_rate": 4.7892804890039535e-05, + "loss": 0.7863, + "step": 3720 + }, + { + "epoch": 0.26, + "learning_rate": 4.788162063081025e-05, + "loss": 0.8216, + "step": 3730 + }, + { + "epoch": 0.26, + "learning_rate": 4.787040808199445e-05, + "loss": 0.7619, + "step": 3740 + }, + { + "epoch": 0.27, + "learning_rate": 4.785916725745471e-05, + "loss": 0.7967, + "step": 3750 + }, + { + "epoch": 0.27, + "learning_rate": 4.784789817108858e-05, + "loss": 0.793, + "step": 3760 + }, + { + "epoch": 0.27, + "learning_rate": 4.783660083682853e-05, + "loss": 0.7863, + "step": 3770 + }, + { + "epoch": 0.27, + "learning_rate": 4.7825275268641984e-05, + "loss": 0.7362, + "step": 3780 + }, + { + "epoch": 0.27, + "learning_rate": 4.781392148053124e-05, + "loss": 0.7477, + "step": 3790 + }, + { + "epoch": 0.27, + "learning_rate": 4.780253948653352e-05, + "loss": 0.7581, + "step": 3800 + }, + { + "epoch": 0.27, + "learning_rate": 4.779112930072087e-05, + "loss": 0.7883, + "step": 3810 + }, + { + "epoch": 0.27, + "learning_rate": 4.7779690937200254e-05, + "loss": 0.7659, + "step": 3820 + }, + { + "epoch": 0.27, + "learning_rate": 4.7768224410113424e-05, + "loss": 0.7475, + "step": 3830 + }, + { + "epoch": 0.27, + "learning_rate": 4.7756729733636976e-05, + "loss": 0.7468, + "step": 3840 + }, + { + "epoch": 0.27, + "learning_rate": 4.774520692198228e-05, + "loss": 0.7625, + "step": 3850 + }, + { + "epoch": 0.27, + "learning_rate": 4.7733655989395533e-05, + "loss": 0.7745, + "step": 3860 + }, + { + "epoch": 0.27, + "learning_rate": 4.772207695015767e-05, + "loss": 0.7741, + "step": 3870 + }, + { + "epoch": 0.27, + "learning_rate": 4.771046981858439e-05, + "loss": 0.7774, + "step": 3880 + }, + { + "epoch": 0.28, + "learning_rate": 4.76988346090261e-05, + "loss": 0.7632, + "step": 3890 + }, + { + "epoch": 0.28, + "learning_rate": 4.768717133586795e-05, + "loss": 0.7729, + "step": 3900 + }, + { + "epoch": 0.28, + "learning_rate": 4.767548001352978e-05, + "loss": 0.7626, + "step": 3910 + }, + { + "epoch": 0.28, + "learning_rate": 4.7663760656466085e-05, + "loss": 0.771, + "step": 3920 + }, + { + "epoch": 0.28, + "learning_rate": 4.765201327916605e-05, + "loss": 0.7865, + "step": 3930 + }, + { + "epoch": 0.28, + "learning_rate": 4.764023789615349e-05, + "loss": 0.7758, + "step": 3940 + }, + { + "epoch": 0.28, + "learning_rate": 4.7628434521986845e-05, + "loss": 0.7699, + "step": 3950 + }, + { + "epoch": 0.28, + "learning_rate": 4.761660317125917e-05, + "loss": 0.7967, + "step": 3960 + }, + { + "epoch": 0.28, + "learning_rate": 4.760474385859808e-05, + "loss": 0.767, + "step": 3970 + }, + { + "epoch": 0.28, + "learning_rate": 4.75928565986658e-05, + "loss": 0.8021, + "step": 3980 + }, + { + "epoch": 0.28, + "learning_rate": 4.7580941406159084e-05, + "loss": 0.7811, + "step": 3990 + }, + { + "epoch": 0.28, + "learning_rate": 4.756899829580923e-05, + "loss": 0.773, + "step": 4000 + }, + { + "epoch": 0.28, + "learning_rate": 4.755702728238204e-05, + "loss": 0.7848, + "step": 4010 + }, + { + "epoch": 0.28, + "learning_rate": 4.754502838067782e-05, + "loss": 0.7723, + "step": 4020 + }, + { + "epoch": 0.29, + "learning_rate": 4.753300160553136e-05, + "loss": 0.7581, + "step": 4030 + }, + { + "epoch": 0.29, + "learning_rate": 4.752094697181192e-05, + "loss": 0.8092, + "step": 4040 + }, + { + "epoch": 0.29, + "learning_rate": 4.750886449442318e-05, + "loss": 0.7962, + "step": 4050 + }, + { + "epoch": 0.29, + "learning_rate": 4.749675418830325e-05, + "loss": 0.7947, + "step": 4060 + }, + { + "epoch": 0.29, + "learning_rate": 4.7484616068424656e-05, + "loss": 0.7743, + "step": 4070 + }, + { + "epoch": 0.29, + "learning_rate": 4.7472450149794314e-05, + "loss": 0.7677, + "step": 4080 + }, + { + "epoch": 0.29, + "learning_rate": 4.7460256447453486e-05, + "loss": 0.7854, + "step": 4090 + }, + { + "epoch": 0.29, + "learning_rate": 4.744803497647782e-05, + "loss": 0.7867, + "step": 4100 + }, + { + "epoch": 0.29, + "learning_rate": 4.743578575197726e-05, + "loss": 0.7568, + "step": 4110 + }, + { + "epoch": 0.29, + "learning_rate": 4.742350878909608e-05, + "loss": 0.7739, + "step": 4120 + }, + { + "epoch": 0.29, + "learning_rate": 4.741120410301286e-05, + "loss": 0.8267, + "step": 4130 + }, + { + "epoch": 0.29, + "learning_rate": 4.7398871708940426e-05, + "loss": 0.7795, + "step": 4140 + }, + { + "epoch": 0.29, + "learning_rate": 4.738651162212589e-05, + "loss": 0.7619, + "step": 4150 + }, + { + "epoch": 0.29, + "learning_rate": 4.7374123857850575e-05, + "loss": 0.7704, + "step": 4160 + }, + { + "epoch": 0.3, + "learning_rate": 4.736170843143004e-05, + "loss": 0.7591, + "step": 4170 + }, + { + "epoch": 0.3, + "learning_rate": 4.7349265358214043e-05, + "loss": 0.7845, + "step": 4180 + }, + { + "epoch": 0.3, + "learning_rate": 4.7336794653586534e-05, + "loss": 0.7719, + "step": 4190 + }, + { + "epoch": 0.3, + "learning_rate": 4.732429633296558e-05, + "loss": 0.7608, + "step": 4200 + }, + { + "epoch": 0.3, + "learning_rate": 4.731177041180346e-05, + "loss": 0.758, + "step": 4210 + }, + { + "epoch": 0.3, + "learning_rate": 4.7299216905586505e-05, + "loss": 0.7861, + "step": 4220 + }, + { + "epoch": 0.3, + "learning_rate": 4.72866358298352e-05, + "loss": 0.7758, + "step": 4230 + }, + { + "epoch": 0.3, + "learning_rate": 4.72740272001041e-05, + "loss": 0.7504, + "step": 4240 + }, + { + "epoch": 0.3, + "learning_rate": 4.726139103198183e-05, + "loss": 0.7682, + "step": 4250 + }, + { + "epoch": 0.3, + "learning_rate": 4.724872734109106e-05, + "loss": 0.7687, + "step": 4260 + }, + { + "epoch": 0.3, + "learning_rate": 4.723603614308847e-05, + "loss": 0.7583, + "step": 4270 + }, + { + "epoch": 0.3, + "learning_rate": 4.7223317453664774e-05, + "loss": 0.8159, + "step": 4280 + }, + { + "epoch": 0.3, + "learning_rate": 4.721057128854467e-05, + "loss": 0.7985, + "step": 4290 + }, + { + "epoch": 0.3, + "learning_rate": 4.719779766348682e-05, + "loss": 0.7919, + "step": 4300 + }, + { + "epoch": 0.31, + "learning_rate": 4.7184996594283824e-05, + "loss": 0.7549, + "step": 4310 + }, + { + "epoch": 0.31, + "learning_rate": 4.717216809676224e-05, + "loss": 0.76, + "step": 4320 + }, + { + "epoch": 0.31, + "learning_rate": 4.715931218678251e-05, + "loss": 0.7879, + "step": 4330 + }, + { + "epoch": 0.31, + "learning_rate": 4.714642888023899e-05, + "loss": 0.7934, + "step": 4340 + }, + { + "epoch": 0.31, + "learning_rate": 4.71335181930599e-05, + "loss": 0.7648, + "step": 4350 + }, + { + "epoch": 0.31, + "learning_rate": 4.712058014120729e-05, + "loss": 0.758, + "step": 4360 + }, + { + "epoch": 0.31, + "learning_rate": 4.710761474067707e-05, + "loss": 0.8095, + "step": 4370 + }, + { + "epoch": 0.31, + "learning_rate": 4.709462200749897e-05, + "loss": 0.7676, + "step": 4380 + }, + { + "epoch": 0.31, + "learning_rate": 4.708160195773648e-05, + "loss": 0.7818, + "step": 4390 + }, + { + "epoch": 0.31, + "learning_rate": 4.7068554607486866e-05, + "loss": 0.7766, + "step": 4400 + }, + { + "epoch": 0.31, + "learning_rate": 4.705547997288118e-05, + "loss": 0.7824, + "step": 4410 + }, + { + "epoch": 0.31, + "learning_rate": 4.704237807008418e-05, + "loss": 0.7713, + "step": 4420 + }, + { + "epoch": 0.31, + "learning_rate": 4.702924891529434e-05, + "loss": 0.7972, + "step": 4430 + }, + { + "epoch": 0.31, + "learning_rate": 4.701609252474384e-05, + "loss": 0.766, + "step": 4440 + }, + { + "epoch": 0.31, + "learning_rate": 4.7002908914698505e-05, + "loss": 0.7817, + "step": 4450 + }, + { + "epoch": 0.32, + "learning_rate": 4.698969810145786e-05, + "loss": 0.7626, + "step": 4460 + }, + { + "epoch": 0.32, + "learning_rate": 4.6976460101355004e-05, + "loss": 0.8012, + "step": 4470 + }, + { + "epoch": 0.32, + "learning_rate": 4.696319493075668e-05, + "loss": 0.7746, + "step": 4480 + }, + { + "epoch": 0.32, + "learning_rate": 4.694990260606324e-05, + "loss": 0.8053, + "step": 4490 + }, + { + "epoch": 0.32, + "learning_rate": 4.6936583143708586e-05, + "loss": 0.7903, + "step": 4500 + }, + { + "epoch": 0.32, + "learning_rate": 4.692323656016016e-05, + "loss": 0.7562, + "step": 4510 + }, + { + "epoch": 0.32, + "learning_rate": 4.690986287191895e-05, + "loss": 0.7919, + "step": 4520 + }, + { + "epoch": 0.32, + "learning_rate": 4.689646209551947e-05, + "loss": 0.7616, + "step": 4530 + }, + { + "epoch": 0.32, + "learning_rate": 4.688303424752969e-05, + "loss": 0.7718, + "step": 4540 + }, + { + "epoch": 0.32, + "learning_rate": 4.6869579344551073e-05, + "loss": 0.7858, + "step": 4550 + }, + { + "epoch": 0.32, + "learning_rate": 4.6856097403218534e-05, + "loss": 0.7657, + "step": 4560 + }, + { + "epoch": 0.32, + "learning_rate": 4.6842588440200405e-05, + "loss": 0.7698, + "step": 4570 + }, + { + "epoch": 0.32, + "learning_rate": 4.682905247219843e-05, + "loss": 0.7716, + "step": 4580 + }, + { + "epoch": 0.32, + "learning_rate": 4.681548951594774e-05, + "loss": 0.7889, + "step": 4590 + }, + { + "epoch": 0.33, + "learning_rate": 4.680189958821683e-05, + "loss": 0.8046, + "step": 4600 + }, + { + "epoch": 0.33, + "learning_rate": 4.678828270580756e-05, + "loss": 0.7613, + "step": 4610 + }, + { + "epoch": 0.33, + "learning_rate": 4.677463888555508e-05, + "loss": 0.7745, + "step": 4620 + }, + { + "epoch": 0.33, + "learning_rate": 4.6760968144327876e-05, + "loss": 0.7697, + "step": 4630 + }, + { + "epoch": 0.33, + "learning_rate": 4.674727049902771e-05, + "loss": 0.7795, + "step": 4640 + }, + { + "epoch": 0.33, + "learning_rate": 4.6733545966589587e-05, + "loss": 0.7851, + "step": 4650 + }, + { + "epoch": 0.33, + "learning_rate": 4.671979456398179e-05, + "loss": 0.7905, + "step": 4660 + }, + { + "epoch": 0.33, + "learning_rate": 4.670601630820578e-05, + "loss": 0.7617, + "step": 4670 + }, + { + "epoch": 0.33, + "learning_rate": 4.6692211216296257e-05, + "loss": 0.7769, + "step": 4680 + }, + { + "epoch": 0.33, + "learning_rate": 4.667837930532108e-05, + "loss": 0.7952, + "step": 4690 + }, + { + "epoch": 0.33, + "learning_rate": 4.666452059238127e-05, + "loss": 0.803, + "step": 4700 + }, + { + "epoch": 0.33, + "learning_rate": 4.665063509461097e-05, + "loss": 0.7749, + "step": 4710 + }, + { + "epoch": 0.33, + "learning_rate": 4.6636722829177466e-05, + "loss": 0.7641, + "step": 4720 + }, + { + "epoch": 0.33, + "learning_rate": 4.6622783813281114e-05, + "loss": 0.7548, + "step": 4730 + }, + { + "epoch": 0.34, + "learning_rate": 4.6608818064155356e-05, + "loss": 0.7696, + "step": 4740 + }, + { + "epoch": 0.34, + "learning_rate": 4.659482559906669e-05, + "loss": 0.8007, + "step": 4750 + }, + { + "epoch": 0.34, + "learning_rate": 4.658080643531462e-05, + "loss": 0.7548, + "step": 4760 + }, + { + "epoch": 0.34, + "learning_rate": 4.656676059023169e-05, + "loss": 0.7572, + "step": 4770 + }, + { + "epoch": 0.34, + "learning_rate": 4.6552688081183405e-05, + "loss": 0.7546, + "step": 4780 + }, + { + "epoch": 0.34, + "learning_rate": 4.653858892556825e-05, + "loss": 0.771, + "step": 4790 + }, + { + "epoch": 0.34, + "learning_rate": 4.652446314081765e-05, + "loss": 0.7633, + "step": 4800 + }, + { + "epoch": 0.34, + "learning_rate": 4.651031074439596e-05, + "loss": 0.7614, + "step": 4810 + }, + { + "epoch": 0.34, + "learning_rate": 4.649613175380043e-05, + "loss": 0.7694, + "step": 4820 + }, + { + "epoch": 0.34, + "learning_rate": 4.648192618656118e-05, + "loss": 0.7628, + "step": 4830 + }, + { + "epoch": 0.34, + "learning_rate": 4.6467694060241206e-05, + "loss": 0.7782, + "step": 4840 + }, + { + "epoch": 0.34, + "learning_rate": 4.645343539243633e-05, + "loss": 0.7816, + "step": 4850 + }, + { + "epoch": 0.34, + "learning_rate": 4.643915020077519e-05, + "loss": 0.7886, + "step": 4860 + }, + { + "epoch": 0.34, + "learning_rate": 4.642483850291922e-05, + "loss": 0.7335, + "step": 4870 + }, + { + "epoch": 0.35, + "learning_rate": 4.641050031656262e-05, + "loss": 0.7666, + "step": 4880 + }, + { + "epoch": 0.35, + "learning_rate": 4.639613565943233e-05, + "loss": 0.7764, + "step": 4890 + }, + { + "epoch": 0.35, + "learning_rate": 4.638174454928805e-05, + "loss": 0.7386, + "step": 4900 + }, + { + "epoch": 0.35, + "learning_rate": 4.636732700392215e-05, + "loss": 0.7629, + "step": 4910 + }, + { + "epoch": 0.35, + "learning_rate": 4.635288304115969e-05, + "loss": 0.7725, + "step": 4920 + }, + { + "epoch": 0.35, + "learning_rate": 4.633841267885841e-05, + "loss": 0.7857, + "step": 4930 + }, + { + "epoch": 0.35, + "learning_rate": 4.6323915934908665e-05, + "loss": 0.7632, + "step": 4940 + }, + { + "epoch": 0.35, + "learning_rate": 4.630939282723344e-05, + "loss": 0.7667, + "step": 4950 + }, + { + "epoch": 0.35, + "learning_rate": 4.629484337378832e-05, + "loss": 0.7853, + "step": 4960 + }, + { + "epoch": 0.35, + "learning_rate": 4.628026759256145e-05, + "loss": 0.7849, + "step": 4970 + }, + { + "epoch": 0.35, + "learning_rate": 4.626566550157353e-05, + "loss": 0.7754, + "step": 4980 + }, + { + "epoch": 0.35, + "learning_rate": 4.6251037118877784e-05, + "loss": 0.7892, + "step": 4990 + }, + { + "epoch": 0.35, + "learning_rate": 4.623638246255996e-05, + "loss": 0.7652, + "step": 5000 + }, + { + "epoch": 0.35, + "learning_rate": 4.622170155073825e-05, + "loss": 0.7959, + "step": 5010 + }, + { + "epoch": 0.36, + "learning_rate": 4.6206994401563355e-05, + "loss": 0.7871, + "step": 5020 + }, + { + "epoch": 0.36, + "learning_rate": 4.6192261033218384e-05, + "loss": 0.7697, + "step": 5030 + }, + { + "epoch": 0.36, + "learning_rate": 4.617750146391887e-05, + "loss": 0.7742, + "step": 5040 + }, + { + "epoch": 0.36, + "learning_rate": 4.616271571191273e-05, + "loss": 0.775, + "step": 5050 + }, + { + "epoch": 0.36, + "learning_rate": 4.614790379548027e-05, + "loss": 0.745, + "step": 5060 + }, + { + "epoch": 0.36, + "learning_rate": 4.613306573293413e-05, + "loss": 0.7829, + "step": 5070 + }, + { + "epoch": 0.36, + "learning_rate": 4.6118201542619285e-05, + "loss": 0.7785, + "step": 5080 + }, + { + "epoch": 0.36, + "learning_rate": 4.6103311242913016e-05, + "loss": 0.8053, + "step": 5090 + }, + { + "epoch": 0.36, + "learning_rate": 4.608839485222486e-05, + "loss": 0.7801, + "step": 5100 + }, + { + "epoch": 0.36, + "learning_rate": 4.607345238899663e-05, + "loss": 0.8004, + "step": 5110 + }, + { + "epoch": 0.36, + "learning_rate": 4.605848387170238e-05, + "loss": 0.7903, + "step": 5120 + }, + { + "epoch": 0.36, + "learning_rate": 4.6043489318848365e-05, + "loss": 0.7794, + "step": 5130 + }, + { + "epoch": 0.36, + "learning_rate": 4.602846874897303e-05, + "loss": 0.7509, + "step": 5140 + }, + { + "epoch": 0.36, + "learning_rate": 4.6013422180646983e-05, + "loss": 0.7748, + "step": 5150 + }, + { + "epoch": 0.37, + "learning_rate": 4.5998349632472994e-05, + "loss": 0.762, + "step": 5160 + }, + { + "epoch": 0.37, + "learning_rate": 4.5983251123085925e-05, + "loss": 0.7515, + "step": 5170 + }, + { + "epoch": 0.37, + "learning_rate": 4.596812667115275e-05, + "loss": 0.7714, + "step": 5180 + }, + { + "epoch": 0.37, + "learning_rate": 4.595297629537252e-05, + "loss": 0.7723, + "step": 5190 + }, + { + "epoch": 0.37, + "learning_rate": 4.5937800014476334e-05, + "loss": 0.7754, + "step": 5200 + }, + { + "epoch": 0.37, + "learning_rate": 4.5922597847227316e-05, + "loss": 0.7633, + "step": 5210 + }, + { + "epoch": 0.37, + "learning_rate": 4.5907369812420595e-05, + "loss": 0.7812, + "step": 5220 + }, + { + "epoch": 0.37, + "learning_rate": 4.5892115928883274e-05, + "loss": 0.7358, + "step": 5230 + }, + { + "epoch": 0.37, + "learning_rate": 4.5876836215474434e-05, + "loss": 0.7895, + "step": 5240 + }, + { + "epoch": 0.37, + "learning_rate": 4.586153069108507e-05, + "loss": 0.7751, + "step": 5250 + }, + { + "epoch": 0.37, + "learning_rate": 4.58461993746381e-05, + "loss": 0.7407, + "step": 5260 + }, + { + "epoch": 0.37, + "learning_rate": 4.583084228508833e-05, + "loss": 0.7787, + "step": 5270 + }, + { + "epoch": 0.37, + "learning_rate": 4.581545944142243e-05, + "loss": 0.7861, + "step": 5280 + }, + { + "epoch": 0.37, + "learning_rate": 4.580005086265888e-05, + "loss": 0.7661, + "step": 5290 + }, + { + "epoch": 0.38, + "learning_rate": 4.578461656784805e-05, + "loss": 0.7507, + "step": 5300 + }, + { + "epoch": 0.38, + "learning_rate": 4.576915657607202e-05, + "loss": 0.7674, + "step": 5310 + }, + { + "epoch": 0.38, + "learning_rate": 4.575367090644471e-05, + "loss": 0.7532, + "step": 5320 + }, + { + "epoch": 0.38, + "learning_rate": 4.573815957811174e-05, + "loss": 0.7624, + "step": 5330 + }, + { + "epoch": 0.38, + "learning_rate": 4.5722622610250466e-05, + "loss": 0.8019, + "step": 5340 + }, + { + "epoch": 0.38, + "learning_rate": 4.570706002206996e-05, + "loss": 0.7635, + "step": 5350 + }, + { + "epoch": 0.38, + "learning_rate": 4.569147183281095e-05, + "loss": 0.762, + "step": 5360 + }, + { + "epoch": 0.38, + "learning_rate": 4.5675858061745814e-05, + "loss": 0.756, + "step": 5370 + }, + { + "epoch": 0.38, + "learning_rate": 4.566021872817858e-05, + "loss": 0.7495, + "step": 5380 + }, + { + "epoch": 0.38, + "learning_rate": 4.564455385144486e-05, + "loss": 0.761, + "step": 5390 + }, + { + "epoch": 0.38, + "learning_rate": 4.562886345091185e-05, + "loss": 0.753, + "step": 5400 + }, + { + "epoch": 0.38, + "learning_rate": 4.561314754597831e-05, + "loss": 0.76, + "step": 5410 + }, + { + "epoch": 0.38, + "learning_rate": 4.559740615607453e-05, + "loss": 0.7307, + "step": 5420 + }, + { + "epoch": 0.38, + "learning_rate": 4.558163930066229e-05, + "loss": 0.7455, + "step": 5430 + }, + { + "epoch": 0.39, + "learning_rate": 4.556584699923488e-05, + "loss": 0.7863, + "step": 5440 + }, + { + "epoch": 0.39, + "learning_rate": 4.555002927131704e-05, + "loss": 0.7518, + "step": 5450 + }, + { + "epoch": 0.39, + "learning_rate": 4.553418613646494e-05, + "loss": 0.735, + "step": 5460 + }, + { + "epoch": 0.39, + "learning_rate": 4.551831761426617e-05, + "loss": 0.7715, + "step": 5470 + }, + { + "epoch": 0.39, + "learning_rate": 4.5502423724339706e-05, + "loss": 0.7423, + "step": 5480 + }, + { + "epoch": 0.39, + "learning_rate": 4.5486504486335876e-05, + "loss": 0.7504, + "step": 5490 + }, + { + "epoch": 0.39, + "learning_rate": 4.547055991993638e-05, + "loss": 0.7598, + "step": 5500 + }, + { + "epoch": 0.39, + "learning_rate": 4.5454590044854185e-05, + "loss": 0.7517, + "step": 5510 + }, + { + "epoch": 0.39, + "learning_rate": 4.5438594880833586e-05, + "loss": 0.7533, + "step": 5520 + }, + { + "epoch": 0.39, + "learning_rate": 4.5422574447650126e-05, + "loss": 0.7872, + "step": 5530 + }, + { + "epoch": 0.39, + "learning_rate": 4.540652876511059e-05, + "loss": 0.7777, + "step": 5540 + }, + { + "epoch": 0.39, + "learning_rate": 4.5390457853052994e-05, + "loss": 0.7838, + "step": 5550 + }, + { + "epoch": 0.39, + "learning_rate": 4.5374361731346526e-05, + "loss": 0.7678, + "step": 5560 + }, + { + "epoch": 0.39, + "learning_rate": 4.535824041989156e-05, + "loss": 0.7444, + "step": 5570 + }, + { + "epoch": 0.39, + "learning_rate": 4.534209393861959e-05, + "loss": 0.7691, + "step": 5580 + }, + { + "epoch": 0.4, + "learning_rate": 4.5325922307493274e-05, + "loss": 0.7975, + "step": 5590 + }, + { + "epoch": 0.4, + "learning_rate": 4.530972554650631e-05, + "loss": 0.7718, + "step": 5600 + }, + { + "epoch": 0.4, + "learning_rate": 4.529350367568349e-05, + "loss": 0.7626, + "step": 5610 + }, + { + "epoch": 0.4, + "learning_rate": 4.527725671508066e-05, + "loss": 0.7574, + "step": 5620 + }, + { + "epoch": 0.4, + "learning_rate": 4.5260984684784656e-05, + "loss": 0.7403, + "step": 5630 + }, + { + "epoch": 0.4, + "learning_rate": 4.524468760491336e-05, + "loss": 0.7511, + "step": 5640 + }, + { + "epoch": 0.4, + "learning_rate": 4.522836549561556e-05, + "loss": 0.7649, + "step": 5650 + }, + { + "epoch": 0.4, + "learning_rate": 4.5212018377071044e-05, + "loss": 0.7782, + "step": 5660 + }, + { + "epoch": 0.4, + "learning_rate": 4.5195646269490475e-05, + "loss": 0.784, + "step": 5670 + }, + { + "epoch": 0.4, + "learning_rate": 4.517924919311545e-05, + "loss": 0.7662, + "step": 5680 + }, + { + "epoch": 0.4, + "learning_rate": 4.5162827168218413e-05, + "loss": 0.761, + "step": 5690 + }, + { + "epoch": 0.4, + "learning_rate": 4.5146380215102666e-05, + "loss": 0.7609, + "step": 5700 + }, + { + "epoch": 0.4, + "learning_rate": 4.512990835410231e-05, + "loss": 0.7946, + "step": 5710 + }, + { + "epoch": 0.4, + "learning_rate": 4.5113411605582266e-05, + "loss": 0.7226, + "step": 5720 + }, + { + "epoch": 0.41, + "learning_rate": 4.509688998993821e-05, + "loss": 0.7565, + "step": 5730 + }, + { + "epoch": 0.41, + "learning_rate": 4.5080343527596555e-05, + "loss": 0.776, + "step": 5740 + }, + { + "epoch": 0.41, + "learning_rate": 4.506377223901447e-05, + "loss": 0.779, + "step": 5750 + }, + { + "epoch": 0.41, + "learning_rate": 4.504717614467977e-05, + "loss": 0.7387, + "step": 5760 + }, + { + "epoch": 0.41, + "learning_rate": 4.5030555265110964e-05, + "loss": 0.7812, + "step": 5770 + }, + { + "epoch": 0.41, + "learning_rate": 4.50139096208572e-05, + "loss": 0.7568, + "step": 5780 + }, + { + "epoch": 0.41, + "learning_rate": 4.499723923249824e-05, + "loss": 0.7773, + "step": 5790 + }, + { + "epoch": 0.41, + "learning_rate": 4.4980544120644456e-05, + "loss": 0.7523, + "step": 5800 + }, + { + "epoch": 0.41, + "learning_rate": 4.4963824305936764e-05, + "loss": 0.748, + "step": 5810 + }, + { + "epoch": 0.41, + "learning_rate": 4.494707980904662e-05, + "loss": 0.7493, + "step": 5820 + }, + { + "epoch": 0.41, + "learning_rate": 4.4930310650676026e-05, + "loss": 0.7691, + "step": 5830 + }, + { + "epoch": 0.41, + "learning_rate": 4.491351685155744e-05, + "loss": 0.7611, + "step": 5840 + }, + { + "epoch": 0.41, + "learning_rate": 4.4896698432453804e-05, + "loss": 0.7332, + "step": 5850 + }, + { + "epoch": 0.41, + "learning_rate": 4.487985541415849e-05, + "loss": 0.7486, + "step": 5860 + }, + { + "epoch": 0.42, + "learning_rate": 4.486298781749528e-05, + "loss": 0.7807, + "step": 5870 + }, + { + "epoch": 0.42, + "learning_rate": 4.484609566331837e-05, + "loss": 0.7707, + "step": 5880 + }, + { + "epoch": 0.42, + "learning_rate": 4.482917897251227e-05, + "loss": 0.7831, + "step": 5890 + }, + { + "epoch": 0.42, + "learning_rate": 4.481223776599188e-05, + "loss": 0.7667, + "step": 5900 + }, + { + "epoch": 0.42, + "learning_rate": 4.479527206470238e-05, + "loss": 0.7681, + "step": 5910 + }, + { + "epoch": 0.42, + "learning_rate": 4.47782818896192e-05, + "loss": 0.7836, + "step": 5920 + }, + { + "epoch": 0.42, + "learning_rate": 4.4761267261748106e-05, + "loss": 0.7464, + "step": 5930 + }, + { + "epoch": 0.42, + "learning_rate": 4.474422820212504e-05, + "loss": 0.7858, + "step": 5940 + }, + { + "epoch": 0.42, + "learning_rate": 4.472716473181617e-05, + "loss": 0.7458, + "step": 5950 + }, + { + "epoch": 0.42, + "learning_rate": 4.4710076871917825e-05, + "loss": 0.7579, + "step": 5960 + }, + { + "epoch": 0.42, + "learning_rate": 4.4692964643556526e-05, + "loss": 0.7861, + "step": 5970 + }, + { + "epoch": 0.42, + "learning_rate": 4.467582806788887e-05, + "loss": 0.7688, + "step": 5980 + }, + { + "epoch": 0.42, + "learning_rate": 4.4658667166101605e-05, + "loss": 0.7387, + "step": 5990 + }, + { + "epoch": 0.42, + "learning_rate": 4.464148195941152e-05, + "loss": 0.7929, + "step": 6000 + }, + { + "epoch": 0.43, + "learning_rate": 4.462427246906548e-05, + "loss": 0.7441, + "step": 6010 + }, + { + "epoch": 0.43, + "learning_rate": 4.460703871634035e-05, + "loss": 0.746, + "step": 6020 + }, + { + "epoch": 0.43, + "learning_rate": 4.4589780722542994e-05, + "loss": 0.7437, + "step": 6030 + }, + { + "epoch": 0.43, + "learning_rate": 4.4572498509010275e-05, + "loss": 0.7837, + "step": 6040 + }, + { + "epoch": 0.43, + "learning_rate": 4.4555192097108954e-05, + "loss": 0.7534, + "step": 6050 + }, + { + "epoch": 0.43, + "learning_rate": 4.4537861508235746e-05, + "loss": 0.7585, + "step": 6060 + }, + { + "epoch": 0.43, + "learning_rate": 4.452050676381725e-05, + "loss": 0.7431, + "step": 6070 + }, + { + "epoch": 0.43, + "learning_rate": 4.450312788530991e-05, + "loss": 0.769, + "step": 6080 + }, + { + "epoch": 0.43, + "learning_rate": 4.448572489420003e-05, + "loss": 0.7781, + "step": 6090 + }, + { + "epoch": 0.43, + "learning_rate": 4.4468297812003724e-05, + "loss": 0.7682, + "step": 6100 + }, + { + "epoch": 0.43, + "learning_rate": 4.445084666026688e-05, + "loss": 0.8062, + "step": 6110 + }, + { + "epoch": 0.43, + "learning_rate": 4.443337146056515e-05, + "loss": 0.7512, + "step": 6120 + }, + { + "epoch": 0.43, + "learning_rate": 4.441587223450391e-05, + "loss": 0.7637, + "step": 6130 + }, + { + "epoch": 0.43, + "learning_rate": 4.4398349003718257e-05, + "loss": 0.7575, + "step": 6140 + }, + { + "epoch": 0.44, + "learning_rate": 4.438080178987296e-05, + "loss": 0.7549, + "step": 6150 + }, + { + "epoch": 0.44, + "learning_rate": 4.436323061466242e-05, + "loss": 0.7705, + "step": 6160 + }, + { + "epoch": 0.44, + "learning_rate": 4.434739608795997e-05, + "loss": 0.7726, + "step": 6170 + }, + { + "epoch": 0.44, + "learning_rate": 4.432977944602969e-05, + "loss": 0.7431, + "step": 6180 + }, + { + "epoch": 0.44, + "learning_rate": 4.431390403463827e-05, + "loss": 0.7338, + "step": 6190 + }, + { + "epoch": 0.44, + "learning_rate": 4.429624200461494e-05, + "loss": 0.7498, + "step": 6200 + }, + { + "epoch": 0.44, + "learning_rate": 4.4278556117771474e-05, + "loss": 0.7325, + "step": 6210 + }, + { + "epoch": 0.44, + "learning_rate": 4.4260846395973755e-05, + "loss": 0.7703, + "step": 6220 + }, + { + "epoch": 0.44, + "learning_rate": 4.424311286111709e-05, + "loss": 0.7717, + "step": 6230 + }, + { + "epoch": 0.44, + "learning_rate": 4.422535553512627e-05, + "loss": 0.7324, + "step": 6240 + }, + { + "epoch": 0.44, + "learning_rate": 4.420757443995548e-05, + "loss": 0.7564, + "step": 6250 + }, + { + "epoch": 0.44, + "learning_rate": 4.4189769597588294e-05, + "loss": 0.7186, + "step": 6260 + }, + { + "epoch": 0.44, + "learning_rate": 4.417194103003765e-05, + "loss": 0.7419, + "step": 6270 + }, + { + "epoch": 0.44, + "learning_rate": 4.4154088759345805e-05, + "loss": 0.7456, + "step": 6280 + }, + { + "epoch": 0.45, + "learning_rate": 4.4136212807584345e-05, + "loss": 0.7672, + "step": 6290 + }, + { + "epoch": 0.45, + "learning_rate": 4.411831319685412e-05, + "loss": 0.7548, + "step": 6300 + }, + { + "epoch": 0.45, + "learning_rate": 4.410038994928522e-05, + "loss": 0.7847, + "step": 6310 + }, + { + "epoch": 0.45, + "learning_rate": 4.408244308703699e-05, + "loss": 0.7269, + "step": 6320 + }, + { + "epoch": 0.45, + "learning_rate": 4.406447263229792e-05, + "loss": 0.7509, + "step": 6330 + }, + { + "epoch": 0.45, + "learning_rate": 4.4046478607285725e-05, + "loss": 0.749, + "step": 6340 + }, + { + "epoch": 0.45, + "learning_rate": 4.402846103424722e-05, + "loss": 0.74, + "step": 6350 + }, + { + "epoch": 0.45, + "learning_rate": 4.401041993545837e-05, + "loss": 0.7405, + "step": 6360 + }, + { + "epoch": 0.45, + "learning_rate": 4.399235533322419e-05, + "loss": 0.7815, + "step": 6370 + }, + { + "epoch": 0.45, + "learning_rate": 4.397426724987876e-05, + "loss": 0.7583, + "step": 6380 + }, + { + "epoch": 0.45, + "learning_rate": 4.3956155707785204e-05, + "loss": 0.7438, + "step": 6390 + }, + { + "epoch": 0.45, + "learning_rate": 4.393802072933566e-05, + "loss": 0.7448, + "step": 6400 + }, + { + "epoch": 0.45, + "learning_rate": 4.39198623369512e-05, + "loss": 0.7583, + "step": 6410 + }, + { + "epoch": 0.45, + "learning_rate": 4.390168055308189e-05, + "loss": 0.7528, + "step": 6420 + }, + { + "epoch": 0.46, + "learning_rate": 4.388347540020669e-05, + "loss": 0.7568, + "step": 6430 + }, + { + "epoch": 0.46, + "learning_rate": 4.386524690083343e-05, + "loss": 0.7638, + "step": 6440 + }, + { + "epoch": 0.46, + "learning_rate": 4.3846995077498875e-05, + "loss": 0.7391, + "step": 6450 + }, + { + "epoch": 0.46, + "learning_rate": 4.382871995276856e-05, + "loss": 0.7421, + "step": 6460 + }, + { + "epoch": 0.46, + "learning_rate": 4.3810421549236845e-05, + "loss": 0.7869, + "step": 6470 + }, + { + "epoch": 0.46, + "learning_rate": 4.37920998895269e-05, + "loss": 0.7767, + "step": 6480 + }, + { + "epoch": 0.46, + "learning_rate": 4.37737549962906e-05, + "loss": 0.7687, + "step": 6490 + }, + { + "epoch": 0.46, + "learning_rate": 4.375538689220858e-05, + "loss": 0.7374, + "step": 6500 + }, + { + "epoch": 0.46, + "learning_rate": 4.373699559999017e-05, + "loss": 0.7617, + "step": 6510 + }, + { + "epoch": 0.46, + "learning_rate": 4.371858114237335e-05, + "loss": 0.7686, + "step": 6520 + }, + { + "epoch": 0.46, + "learning_rate": 4.3700143542124745e-05, + "loss": 0.739, + "step": 6530 + }, + { + "epoch": 0.46, + "learning_rate": 4.36816828220396e-05, + "loss": 0.7728, + "step": 6540 + }, + { + "epoch": 0.46, + "learning_rate": 4.3663199004941756e-05, + "loss": 0.7622, + "step": 6550 + }, + { + "epoch": 0.46, + "learning_rate": 4.364469211368358e-05, + "loss": 0.7655, + "step": 6560 + }, + { + "epoch": 0.47, + "learning_rate": 4.362616217114599e-05, + "loss": 0.7227, + "step": 6570 + }, + { + "epoch": 0.47, + "learning_rate": 4.360760920023839e-05, + "loss": 0.7899, + "step": 6580 + }, + { + "epoch": 0.47, + "learning_rate": 4.3589033223898654e-05, + "loss": 0.7411, + "step": 6590 + }, + { + "epoch": 0.47, + "learning_rate": 4.357043426509312e-05, + "loss": 0.7544, + "step": 6600 + }, + { + "epoch": 0.47, + "learning_rate": 4.3551812346816514e-05, + "loss": 0.7661, + "step": 6610 + }, + { + "epoch": 0.47, + "learning_rate": 4.3533167492091965e-05, + "loss": 0.7741, + "step": 6620 + }, + { + "epoch": 0.47, + "learning_rate": 4.351449972397095e-05, + "loss": 0.7939, + "step": 6630 + }, + { + "epoch": 0.47, + "learning_rate": 4.3495809065533275e-05, + "loss": 0.7487, + "step": 6640 + }, + { + "epoch": 0.47, + "learning_rate": 4.347709553988707e-05, + "loss": 0.7369, + "step": 6650 + }, + { + "epoch": 0.47, + "learning_rate": 4.345835917016869e-05, + "loss": 0.74, + "step": 6660 + }, + { + "epoch": 0.47, + "learning_rate": 4.3439599979542775e-05, + "loss": 0.7471, + "step": 6670 + }, + { + "epoch": 0.47, + "learning_rate": 4.342081799120216e-05, + "loss": 0.7852, + "step": 6680 + }, + { + "epoch": 0.47, + "learning_rate": 4.3402013228367866e-05, + "loss": 0.7979, + "step": 6690 + }, + { + "epoch": 0.47, + "learning_rate": 4.3383185714289075e-05, + "loss": 0.766, + "step": 6700 + }, + { + "epoch": 0.47, + "learning_rate": 4.336433547224311e-05, + "loss": 0.7547, + "step": 6710 + }, + { + "epoch": 0.48, + "learning_rate": 4.334546252553537e-05, + "loss": 0.7385, + "step": 6720 + }, + { + "epoch": 0.48, + "learning_rate": 4.332656689749933e-05, + "loss": 0.7328, + "step": 6730 + }, + { + "epoch": 0.48, + "learning_rate": 4.3307648611496534e-05, + "loss": 0.8058, + "step": 6740 + }, + { + "epoch": 0.48, + "learning_rate": 4.32887076909165e-05, + "loss": 0.7683, + "step": 6750 + }, + { + "epoch": 0.48, + "learning_rate": 4.326974415917675e-05, + "loss": 0.772, + "step": 6760 + }, + { + "epoch": 0.48, + "learning_rate": 4.325075803972277e-05, + "loss": 0.769, + "step": 6770 + }, + { + "epoch": 0.48, + "learning_rate": 4.3231749356027953e-05, + "loss": 0.7472, + "step": 6780 + }, + { + "epoch": 0.48, + "learning_rate": 4.32127181315936e-05, + "loss": 0.7345, + "step": 6790 + }, + { + "epoch": 0.48, + "learning_rate": 4.319366438994887e-05, + "loss": 0.753, + "step": 6800 + }, + { + "epoch": 0.48, + "learning_rate": 4.3174588154650786e-05, + "loss": 0.7583, + "step": 6810 + }, + { + "epoch": 0.48, + "learning_rate": 4.3155489449284145e-05, + "loss": 0.758, + "step": 6820 + }, + { + "epoch": 0.48, + "learning_rate": 4.313636829746155e-05, + "loss": 0.7883, + "step": 6830 + }, + { + "epoch": 0.48, + "learning_rate": 4.311722472282336e-05, + "loss": 0.7471, + "step": 6840 + }, + { + "epoch": 0.48, + "learning_rate": 4.309805874903764e-05, + "loss": 0.7488, + "step": 6850 + }, + { + "epoch": 0.49, + "learning_rate": 4.307887039980014e-05, + "loss": 0.7445, + "step": 6860 + }, + { + "epoch": 0.49, + "learning_rate": 4.30596596988343e-05, + "loss": 0.7558, + "step": 6870 + }, + { + "epoch": 0.49, + "learning_rate": 4.3040426669891185e-05, + "loss": 0.7653, + "step": 6880 + }, + { + "epoch": 0.49, + "learning_rate": 4.3021171336749456e-05, + "loss": 0.7492, + "step": 6890 + }, + { + "epoch": 0.49, + "learning_rate": 4.3001893723215345e-05, + "loss": 0.7834, + "step": 6900 + }, + { + "epoch": 0.49, + "learning_rate": 4.2982593853122665e-05, + "loss": 0.7641, + "step": 6910 + }, + { + "epoch": 0.49, + "learning_rate": 4.2963271750332715e-05, + "loss": 0.7951, + "step": 6920 + }, + { + "epoch": 0.49, + "learning_rate": 4.294392743873427e-05, + "loss": 0.7493, + "step": 6930 + }, + { + "epoch": 0.49, + "learning_rate": 4.2924560942243594e-05, + "loss": 0.7314, + "step": 6940 + }, + { + "epoch": 0.49, + "learning_rate": 4.2905172284804366e-05, + "loss": 0.7427, + "step": 6950 + }, + { + "epoch": 0.49, + "learning_rate": 4.288576149038767e-05, + "loss": 0.7733, + "step": 6960 + }, + { + "epoch": 0.49, + "learning_rate": 4.286632858299193e-05, + "loss": 0.717, + "step": 6970 + }, + { + "epoch": 0.49, + "learning_rate": 4.284687358664296e-05, + "loss": 0.7715, + "step": 6980 + }, + { + "epoch": 0.49, + "learning_rate": 4.2827396525393834e-05, + "loss": 0.7389, + "step": 6990 + }, + { + "epoch": 0.5, + "learning_rate": 4.280789742332494e-05, + "loss": 0.7324, + "step": 7000 + }, + { + "epoch": 0.5, + "learning_rate": 4.27883763045439e-05, + "loss": 0.7295, + "step": 7010 + }, + { + "epoch": 0.5, + "learning_rate": 4.2768833193185555e-05, + "loss": 0.7567, + "step": 7020 + }, + { + "epoch": 0.5, + "learning_rate": 4.2749268113411945e-05, + "loss": 0.7474, + "step": 7030 + }, + { + "epoch": 0.5, + "learning_rate": 4.272968108941226e-05, + "loss": 0.7627, + "step": 7040 + }, + { + "epoch": 0.5, + "learning_rate": 4.2710072145402834e-05, + "loss": 0.7624, + "step": 7050 + }, + { + "epoch": 0.5, + "learning_rate": 4.269044130562709e-05, + "loss": 0.7408, + "step": 7060 + }, + { + "epoch": 0.5, + "learning_rate": 4.267078859435554e-05, + "loss": 0.7312, + "step": 7070 + }, + { + "epoch": 0.5, + "learning_rate": 4.265111403588571e-05, + "loss": 0.728, + "step": 7080 + }, + { + "epoch": 0.5, + "learning_rate": 4.263141765454215e-05, + "loss": 0.7289, + "step": 7090 + }, + { + "epoch": 0.5, + "learning_rate": 4.261169947467639e-05, + "loss": 0.7292, + "step": 7100 + }, + { + "epoch": 0.5, + "learning_rate": 4.259195952066693e-05, + "loss": 0.745, + "step": 7110 + }, + { + "epoch": 0.5, + "learning_rate": 4.257219781691914e-05, + "loss": 0.7376, + "step": 7120 + }, + { + "epoch": 0.5, + "learning_rate": 4.255241438786533e-05, + "loss": 0.7655, + "step": 7130 + }, + { + "epoch": 0.51, + "learning_rate": 4.253260925796465e-05, + "loss": 0.7414, + "step": 7140 + }, + { + "epoch": 0.51, + "learning_rate": 4.251278245170308e-05, + "loss": 0.7371, + "step": 7150 + }, + { + "epoch": 0.51, + "learning_rate": 4.249293399359341e-05, + "loss": 0.7798, + "step": 7160 + }, + { + "epoch": 0.51, + "learning_rate": 4.247306390817518e-05, + "loss": 0.7531, + "step": 7170 + }, + { + "epoch": 0.51, + "learning_rate": 4.245317222001467e-05, + "loss": 0.7621, + "step": 7180 + }, + { + "epoch": 0.51, + "learning_rate": 4.243325895370489e-05, + "loss": 0.7582, + "step": 7190 + }, + { + "epoch": 0.51, + "learning_rate": 4.2413324133865516e-05, + "loss": 0.7491, + "step": 7200 + }, + { + "epoch": 0.51, + "learning_rate": 4.239336778514287e-05, + "loss": 0.7751, + "step": 7210 + }, + { + "epoch": 0.51, + "learning_rate": 4.237338993220988e-05, + "loss": 0.7497, + "step": 7220 + }, + { + "epoch": 0.51, + "learning_rate": 4.23533905997661e-05, + "loss": 0.7692, + "step": 7230 + }, + { + "epoch": 0.51, + "learning_rate": 4.2333369812537583e-05, + "loss": 0.7796, + "step": 7240 + }, + { + "epoch": 0.51, + "learning_rate": 4.231332759527695e-05, + "loss": 0.7387, + "step": 7250 + }, + { + "epoch": 0.51, + "learning_rate": 4.2293263972763295e-05, + "loss": 0.7472, + "step": 7260 + }, + { + "epoch": 0.51, + "learning_rate": 4.227317896980221e-05, + "loss": 0.7488, + "step": 7270 + }, + { + "epoch": 0.52, + "learning_rate": 4.225307261122568e-05, + "loss": 0.7418, + "step": 7280 + }, + { + "epoch": 0.52, + "learning_rate": 4.223294492189209e-05, + "loss": 0.7462, + "step": 7290 + }, + { + "epoch": 0.52, + "learning_rate": 4.2212795926686255e-05, + "loss": 0.7761, + "step": 7300 + }, + { + "epoch": 0.52, + "learning_rate": 4.2192625650519265e-05, + "loss": 0.7454, + "step": 7310 + }, + { + "epoch": 0.52, + "learning_rate": 4.217243411832856e-05, + "loss": 0.7579, + "step": 7320 + }, + { + "epoch": 0.52, + "learning_rate": 4.215222135507784e-05, + "loss": 0.773, + "step": 7330 + }, + { + "epoch": 0.52, + "learning_rate": 4.2131987385757066e-05, + "loss": 0.7655, + "step": 7340 + }, + { + "epoch": 0.52, + "learning_rate": 4.211173223538242e-05, + "loss": 0.7359, + "step": 7350 + }, + { + "epoch": 0.52, + "learning_rate": 4.209145592899625e-05, + "loss": 0.7741, + "step": 7360 + }, + { + "epoch": 0.52, + "learning_rate": 4.207115849166709e-05, + "loss": 0.7681, + "step": 7370 + }, + { + "epoch": 0.52, + "learning_rate": 4.2050839948489565e-05, + "loss": 0.7548, + "step": 7380 + }, + { + "epoch": 0.52, + "learning_rate": 4.203050032458443e-05, + "loss": 0.7798, + "step": 7390 + }, + { + "epoch": 0.52, + "learning_rate": 4.2010139645098476e-05, + "loss": 0.7405, + "step": 7400 + }, + { + "epoch": 0.52, + "learning_rate": 4.1989757935204535e-05, + "loss": 0.7491, + "step": 7410 + }, + { + "epoch": 0.53, + "learning_rate": 4.1969355220101446e-05, + "loss": 0.7777, + "step": 7420 + }, + { + "epoch": 0.53, + "learning_rate": 4.194893152501401e-05, + "loss": 0.7521, + "step": 7430 + }, + { + "epoch": 0.53, + "learning_rate": 4.192848687519296e-05, + "loss": 0.7891, + "step": 7440 + }, + { + "epoch": 0.53, + "learning_rate": 4.190802129591496e-05, + "loss": 0.768, + "step": 7450 + }, + { + "epoch": 0.53, + "learning_rate": 4.188753481248253e-05, + "loss": 0.7514, + "step": 7460 + }, + { + "epoch": 0.53, + "learning_rate": 4.186702745022403e-05, + "loss": 0.7322, + "step": 7470 + }, + { + "epoch": 0.53, + "learning_rate": 4.1846499234493655e-05, + "loss": 0.7411, + "step": 7480 + }, + { + "epoch": 0.53, + "learning_rate": 4.182595019067136e-05, + "loss": 0.743, + "step": 7490 + }, + { + "epoch": 0.53, + "learning_rate": 4.180538034416287e-05, + "loss": 0.7602, + "step": 7500 + }, + { + "epoch": 0.53, + "learning_rate": 4.178478972039961e-05, + "loss": 0.7293, + "step": 7510 + }, + { + "epoch": 0.53, + "learning_rate": 4.1764178344838716e-05, + "loss": 0.763, + "step": 7520 + }, + { + "epoch": 0.53, + "learning_rate": 4.174354624296296e-05, + "loss": 0.7368, + "step": 7530 + }, + { + "epoch": 0.53, + "learning_rate": 4.172289344028075e-05, + "loss": 0.7689, + "step": 7540 + }, + { + "epoch": 0.53, + "learning_rate": 4.170221996232607e-05, + "loss": 0.79, + "step": 7550 + }, + { + "epoch": 0.54, + "learning_rate": 4.16815258346585e-05, + "loss": 0.7563, + "step": 7560 + }, + { + "epoch": 0.54, + "learning_rate": 4.1660811082863115e-05, + "loss": 0.7594, + "step": 7570 + }, + { + "epoch": 0.54, + "learning_rate": 4.164007573255052e-05, + "loss": 0.7512, + "step": 7580 + }, + { + "epoch": 0.54, + "learning_rate": 4.161931980935675e-05, + "loss": 0.7693, + "step": 7590 + }, + { + "epoch": 0.54, + "learning_rate": 4.15985433389433e-05, + "loss": 0.7577, + "step": 7600 + }, + { + "epoch": 0.54, + "learning_rate": 4.157774634699707e-05, + "loss": 0.7549, + "step": 7610 + }, + { + "epoch": 0.54, + "learning_rate": 4.155692885923033e-05, + "loss": 0.7464, + "step": 7620 + }, + { + "epoch": 0.54, + "learning_rate": 4.1536090901380664e-05, + "loss": 0.7663, + "step": 7630 + }, + { + "epoch": 0.54, + "learning_rate": 4.151523249921101e-05, + "loss": 0.7683, + "step": 7640 + }, + { + "epoch": 0.54, + "learning_rate": 4.149435367850955e-05, + "loss": 0.7438, + "step": 7650 + }, + { + "epoch": 0.54, + "learning_rate": 4.14734544650897e-05, + "loss": 0.7332, + "step": 7660 + }, + { + "epoch": 0.54, + "learning_rate": 4.145253488479013e-05, + "loss": 0.7226, + "step": 7670 + }, + { + "epoch": 0.54, + "learning_rate": 4.143159496347466e-05, + "loss": 0.7398, + "step": 7680 + }, + { + "epoch": 0.54, + "learning_rate": 4.1410634727032264e-05, + "loss": 0.784, + "step": 7690 + }, + { + "epoch": 0.55, + "learning_rate": 4.138965420137704e-05, + "loss": 0.7534, + "step": 7700 + }, + { + "epoch": 0.55, + "learning_rate": 4.136865341244815e-05, + "loss": 0.746, + "step": 7710 + }, + { + "epoch": 0.55, + "learning_rate": 4.1347632386209834e-05, + "loss": 0.7369, + "step": 7720 + }, + { + "epoch": 0.55, + "learning_rate": 4.132659114865134e-05, + "loss": 0.7417, + "step": 7730 + }, + { + "epoch": 0.55, + "learning_rate": 4.13055297257869e-05, + "loss": 0.7658, + "step": 7740 + }, + { + "epoch": 0.55, + "learning_rate": 4.1284448143655716e-05, + "loss": 0.7414, + "step": 7750 + }, + { + "epoch": 0.55, + "learning_rate": 4.126334642832189e-05, + "loss": 0.7202, + "step": 7760 + }, + { + "epoch": 0.55, + "learning_rate": 4.1242224605874456e-05, + "loss": 0.7547, + "step": 7770 + }, + { + "epoch": 0.55, + "learning_rate": 4.122108270242726e-05, + "loss": 0.7254, + "step": 7780 + }, + { + "epoch": 0.55, + "learning_rate": 4.119992074411901e-05, + "loss": 0.7217, + "step": 7790 + }, + { + "epoch": 0.55, + "learning_rate": 4.1178738757113186e-05, + "loss": 0.7806, + "step": 7800 + }, + { + "epoch": 0.55, + "learning_rate": 4.115753676759805e-05, + "loss": 0.7418, + "step": 7810 + }, + { + "epoch": 0.55, + "learning_rate": 4.113631480178657e-05, + "loss": 0.7323, + "step": 7820 + }, + { + "epoch": 0.55, + "learning_rate": 4.111507288591645e-05, + "loss": 0.7351, + "step": 7830 + }, + { + "epoch": 0.55, + "learning_rate": 4.109381104625001e-05, + "loss": 0.7437, + "step": 7840 + }, + { + "epoch": 0.56, + "learning_rate": 4.1072529309074235e-05, + "loss": 0.7061, + "step": 7850 + }, + { + "epoch": 0.56, + "learning_rate": 4.105122770070071e-05, + "loss": 0.7358, + "step": 7860 + }, + { + "epoch": 0.56, + "learning_rate": 4.1029906247465576e-05, + "loss": 0.7275, + "step": 7870 + }, + { + "epoch": 0.56, + "learning_rate": 4.1008564975729514e-05, + "loss": 0.8013, + "step": 7880 + }, + { + "epoch": 0.56, + "learning_rate": 4.098720391187771e-05, + "loss": 0.7475, + "step": 7890 + }, + { + "epoch": 0.56, + "learning_rate": 4.096582308231981e-05, + "loss": 0.7264, + "step": 7900 + }, + { + "epoch": 0.56, + "learning_rate": 4.094442251348991e-05, + "loss": 0.7853, + "step": 7910 + }, + { + "epoch": 0.56, + "learning_rate": 4.092300223184651e-05, + "loss": 0.7747, + "step": 7920 + }, + { + "epoch": 0.56, + "learning_rate": 4.0901562263872465e-05, + "loss": 0.7651, + "step": 7930 + }, + { + "epoch": 0.56, + "learning_rate": 4.088010263607499e-05, + "loss": 0.7529, + "step": 7940 + }, + { + "epoch": 0.56, + "learning_rate": 4.08586233749856e-05, + "loss": 0.7526, + "step": 7950 + }, + { + "epoch": 0.56, + "learning_rate": 4.0837124507160064e-05, + "loss": 0.7322, + "step": 7960 + }, + { + "epoch": 0.56, + "learning_rate": 4.0815606059178423e-05, + "loss": 0.757, + "step": 7970 + }, + { + "epoch": 0.56, + "learning_rate": 4.0794068057644904e-05, + "loss": 0.7799, + "step": 7980 + }, + { + "epoch": 0.57, + "learning_rate": 4.0772510529187924e-05, + "loss": 0.7197, + "step": 7990 + }, + { + "epoch": 0.57, + "learning_rate": 4.0750933500460025e-05, + "loss": 0.7224, + "step": 8000 + }, + { + "epoch": 0.57, + "learning_rate": 4.072933699813788e-05, + "loss": 0.7208, + "step": 8010 + }, + { + "epoch": 0.57, + "learning_rate": 4.070772104892221e-05, + "loss": 0.7544, + "step": 8020 + }, + { + "epoch": 0.57, + "learning_rate": 4.068608567953781e-05, + "loss": 0.7631, + "step": 8030 + }, + { + "epoch": 0.57, + "learning_rate": 4.066443091673345e-05, + "loss": 0.7584, + "step": 8040 + }, + { + "epoch": 0.57, + "learning_rate": 4.064275678728191e-05, + "loss": 0.7454, + "step": 8050 + }, + { + "epoch": 0.57, + "learning_rate": 4.0621063317979904e-05, + "loss": 0.7882, + "step": 8060 + }, + { + "epoch": 0.57, + "learning_rate": 4.059935053564805e-05, + "loss": 0.7521, + "step": 8070 + }, + { + "epoch": 0.57, + "learning_rate": 4.057761846713084e-05, + "loss": 0.7452, + "step": 8080 + }, + { + "epoch": 0.57, + "learning_rate": 4.055586713929662e-05, + "loss": 0.7729, + "step": 8090 + }, + { + "epoch": 0.57, + "learning_rate": 4.053409657903755e-05, + "loss": 0.7471, + "step": 8100 + }, + { + "epoch": 0.57, + "learning_rate": 4.0512306813269555e-05, + "loss": 0.7553, + "step": 8110 + }, + { + "epoch": 0.57, + "learning_rate": 4.0490497868932306e-05, + "loss": 0.7342, + "step": 8120 + }, + { + "epoch": 0.58, + "learning_rate": 4.046866977298921e-05, + "loss": 0.7419, + "step": 8130 + }, + { + "epoch": 0.58, + "learning_rate": 4.044682255242732e-05, + "loss": 0.7688, + "step": 8140 + }, + { + "epoch": 0.58, + "learning_rate": 4.042495623425735e-05, + "loss": 0.7387, + "step": 8150 + }, + { + "epoch": 0.58, + "learning_rate": 4.040307084551362e-05, + "loss": 0.7394, + "step": 8160 + }, + { + "epoch": 0.58, + "learning_rate": 4.038116641325403e-05, + "loss": 0.7233, + "step": 8170 + }, + { + "epoch": 0.58, + "learning_rate": 4.035924296456003e-05, + "loss": 0.7869, + "step": 8180 + }, + { + "epoch": 0.58, + "learning_rate": 4.033730052653656e-05, + "loss": 0.7391, + "step": 8190 + }, + { + "epoch": 0.58, + "learning_rate": 4.031533912631207e-05, + "loss": 0.7531, + "step": 8200 + }, + { + "epoch": 0.58, + "learning_rate": 4.0293358791038426e-05, + "loss": 0.7616, + "step": 8210 + }, + { + "epoch": 0.58, + "learning_rate": 4.027135954789093e-05, + "loss": 0.7474, + "step": 8220 + }, + { + "epoch": 0.58, + "learning_rate": 4.024934142406822e-05, + "loss": 0.7436, + "step": 8230 + }, + { + "epoch": 0.58, + "learning_rate": 4.0227304446792313e-05, + "loss": 0.7671, + "step": 8240 + }, + { + "epoch": 0.58, + "learning_rate": 4.020524864330854e-05, + "loss": 0.7358, + "step": 8250 + }, + { + "epoch": 0.58, + "learning_rate": 4.018317404088546e-05, + "loss": 0.7542, + "step": 8260 + }, + { + "epoch": 0.59, + "learning_rate": 4.016108066681494e-05, + "loss": 0.7609, + "step": 8270 + }, + { + "epoch": 0.59, + "learning_rate": 4.0138968548412006e-05, + "loss": 0.7676, + "step": 8280 + }, + { + "epoch": 0.59, + "learning_rate": 4.011683771301486e-05, + "loss": 0.7197, + "step": 8290 + }, + { + "epoch": 0.59, + "learning_rate": 4.009468818798488e-05, + "loss": 0.7711, + "step": 8300 + }, + { + "epoch": 0.59, + "learning_rate": 4.007252000070653e-05, + "loss": 0.7477, + "step": 8310 + }, + { + "epoch": 0.59, + "learning_rate": 4.005033317858734e-05, + "loss": 0.7677, + "step": 8320 + }, + { + "epoch": 0.59, + "learning_rate": 4.002812774905788e-05, + "loss": 0.739, + "step": 8330 + }, + { + "epoch": 0.59, + "learning_rate": 4.0005903739571725e-05, + "loss": 0.7243, + "step": 8340 + }, + { + "epoch": 0.59, + "learning_rate": 3.998366117760545e-05, + "loss": 0.7648, + "step": 8350 + }, + { + "epoch": 0.59, + "learning_rate": 3.9961400090658526e-05, + "loss": 0.721, + "step": 8360 + }, + { + "epoch": 0.59, + "learning_rate": 3.993912050625336e-05, + "loss": 0.7516, + "step": 8370 + }, + { + "epoch": 0.59, + "learning_rate": 3.991682245193519e-05, + "loss": 0.7644, + "step": 8380 + }, + { + "epoch": 0.59, + "learning_rate": 3.989450595527214e-05, + "loss": 0.7364, + "step": 8390 + }, + { + "epoch": 0.59, + "learning_rate": 3.987217104385509e-05, + "loss": 0.7517, + "step": 8400 + }, + { + "epoch": 0.6, + "learning_rate": 3.984981774529771e-05, + "loss": 0.7686, + "step": 8410 + }, + { + "epoch": 0.6, + "learning_rate": 3.982744608723641e-05, + "loss": 0.7526, + "step": 8420 + }, + { + "epoch": 0.6, + "learning_rate": 3.980505609733027e-05, + "loss": 0.7468, + "step": 8430 + }, + { + "epoch": 0.6, + "learning_rate": 3.978264780326105e-05, + "loss": 0.7765, + "step": 8440 + }, + { + "epoch": 0.6, + "learning_rate": 3.976022123273316e-05, + "loss": 0.7367, + "step": 8450 + }, + { + "epoch": 0.6, + "learning_rate": 3.973777641347357e-05, + "loss": 0.732, + "step": 8460 + }, + { + "epoch": 0.6, + "learning_rate": 3.971531337323183e-05, + "loss": 0.7508, + "step": 8470 + }, + { + "epoch": 0.6, + "learning_rate": 3.969283213978003e-05, + "loss": 0.739, + "step": 8480 + }, + { + "epoch": 0.6, + "learning_rate": 3.967033274091273e-05, + "loss": 0.7511, + "step": 8490 + }, + { + "epoch": 0.6, + "learning_rate": 3.964781520444696e-05, + "loss": 0.7497, + "step": 8500 + }, + { + "epoch": 0.6, + "learning_rate": 3.962527955822217e-05, + "loss": 0.7393, + "step": 8510 + }, + { + "epoch": 0.6, + "learning_rate": 3.96027258301002e-05, + "loss": 0.7489, + "step": 8520 + }, + { + "epoch": 0.6, + "learning_rate": 3.958015404796526e-05, + "loss": 0.7484, + "step": 8530 + }, + { + "epoch": 0.6, + "learning_rate": 3.955756423972385e-05, + "loss": 0.7324, + "step": 8540 + }, + { + "epoch": 0.61, + "learning_rate": 3.9534956433304806e-05, + "loss": 0.7289, + "step": 8550 + }, + { + "epoch": 0.61, + "learning_rate": 3.9512330656659155e-05, + "loss": 0.7621, + "step": 8560 + }, + { + "epoch": 0.61, + "learning_rate": 3.9489686937760195e-05, + "loss": 0.7426, + "step": 8570 + }, + { + "epoch": 0.61, + "learning_rate": 3.946702530460337e-05, + "loss": 0.7531, + "step": 8580 + }, + { + "epoch": 0.61, + "learning_rate": 3.9444345785206285e-05, + "loss": 0.7292, + "step": 8590 + }, + { + "epoch": 0.61, + "learning_rate": 3.942164840760866e-05, + "loss": 0.7191, + "step": 8600 + }, + { + "epoch": 0.61, + "learning_rate": 3.93989331998723e-05, + "loss": 0.7325, + "step": 8610 + }, + { + "epoch": 0.61, + "learning_rate": 3.937620019008105e-05, + "loss": 0.7309, + "step": 8620 + }, + { + "epoch": 0.61, + "learning_rate": 3.9353449406340755e-05, + "loss": 0.7346, + "step": 8630 + }, + { + "epoch": 0.61, + "learning_rate": 3.933068087677924e-05, + "loss": 0.7604, + "step": 8640 + }, + { + "epoch": 0.61, + "learning_rate": 3.930789462954628e-05, + "loss": 0.7602, + "step": 8650 + }, + { + "epoch": 0.61, + "learning_rate": 3.9285090692813544e-05, + "loss": 0.7238, + "step": 8660 + }, + { + "epoch": 0.61, + "learning_rate": 3.9262269094774564e-05, + "loss": 0.7481, + "step": 8670 + }, + { + "epoch": 0.61, + "learning_rate": 3.9239429863644736e-05, + "loss": 0.7412, + "step": 8680 + }, + { + "epoch": 0.62, + "learning_rate": 3.921657302766123e-05, + "loss": 0.7643, + "step": 8690 + }, + { + "epoch": 0.62, + "learning_rate": 3.9193698615082995e-05, + "loss": 0.7115, + "step": 8700 + }, + { + "epoch": 0.62, + "learning_rate": 3.9170806654190695e-05, + "loss": 0.77, + "step": 8710 + }, + { + "epoch": 0.62, + "learning_rate": 3.914789717328671e-05, + "loss": 0.7304, + "step": 8720 + }, + { + "epoch": 0.62, + "learning_rate": 3.912497020069505e-05, + "loss": 0.7337, + "step": 8730 + }, + { + "epoch": 0.62, + "learning_rate": 3.910202576476142e-05, + "loss": 0.7589, + "step": 8740 + }, + { + "epoch": 0.62, + "learning_rate": 3.907906389385302e-05, + "loss": 0.733, + "step": 8750 + }, + { + "epoch": 0.62, + "learning_rate": 3.9056084616358666e-05, + "loss": 0.7525, + "step": 8760 + }, + { + "epoch": 0.62, + "learning_rate": 3.90330879606887e-05, + "loss": 0.7483, + "step": 8770 + }, + { + "epoch": 0.62, + "learning_rate": 3.9010073955274915e-05, + "loss": 0.7159, + "step": 8780 + }, + { + "epoch": 0.62, + "learning_rate": 3.898704262857057e-05, + "loss": 0.7235, + "step": 8790 + }, + { + "epoch": 0.62, + "learning_rate": 3.8963994009050356e-05, + "loss": 0.7327, + "step": 8800 + }, + { + "epoch": 0.62, + "learning_rate": 3.894092812521031e-05, + "loss": 0.7502, + "step": 8810 + }, + { + "epoch": 0.62, + "learning_rate": 3.891784500556784e-05, + "loss": 0.7344, + "step": 8820 + }, + { + "epoch": 0.63, + "learning_rate": 3.8894744678661655e-05, + "loss": 0.7401, + "step": 8830 + }, + { + "epoch": 0.63, + "learning_rate": 3.887162717305173e-05, + "loss": 0.7561, + "step": 8840 + }, + { + "epoch": 0.63, + "learning_rate": 3.88484925173193e-05, + "loss": 0.7565, + "step": 8850 + }, + { + "epoch": 0.63, + "learning_rate": 3.882534074006678e-05, + "loss": 0.7528, + "step": 8860 + }, + { + "epoch": 0.63, + "learning_rate": 3.8802171869917765e-05, + "loss": 0.7342, + "step": 8870 + }, + { + "epoch": 0.63, + "learning_rate": 3.8778985935516985e-05, + "loss": 0.7542, + "step": 8880 + }, + { + "epoch": 0.63, + "learning_rate": 3.8755782965530265e-05, + "loss": 0.7435, + "step": 8890 + }, + { + "epoch": 0.63, + "learning_rate": 3.873256298864448e-05, + "loss": 0.7558, + "step": 8900 + }, + { + "epoch": 0.63, + "learning_rate": 3.870932603356755e-05, + "loss": 0.7552, + "step": 8910 + }, + { + "epoch": 0.63, + "learning_rate": 3.8686072129028385e-05, + "loss": 0.7223, + "step": 8920 + }, + { + "epoch": 0.63, + "learning_rate": 3.866280130377682e-05, + "loss": 0.7385, + "step": 8930 + }, + { + "epoch": 0.63, + "learning_rate": 3.8639513586583656e-05, + "loss": 0.7372, + "step": 8940 + }, + { + "epoch": 0.63, + "learning_rate": 3.861620900624054e-05, + "loss": 0.7408, + "step": 8950 + }, + { + "epoch": 0.63, + "learning_rate": 3.859288759156e-05, + "loss": 0.7633, + "step": 8960 + }, + { + "epoch": 0.63, + "learning_rate": 3.8569549371375346e-05, + "loss": 0.7412, + "step": 8970 + }, + { + "epoch": 0.64, + "learning_rate": 3.854619437454068e-05, + "loss": 0.7195, + "step": 8980 + }, + { + "epoch": 0.64, + "learning_rate": 3.8522822629930844e-05, + "loss": 0.7281, + "step": 8990 + }, + { + "epoch": 0.64, + "learning_rate": 3.849943416644139e-05, + "loss": 0.7029, + "step": 9000 + }, + { + "epoch": 0.64, + "learning_rate": 3.847602901298854e-05, + "loss": 0.7543, + "step": 9010 + }, + { + "epoch": 0.64, + "learning_rate": 3.845260719850915e-05, + "loss": 0.7569, + "step": 9020 + }, + { + "epoch": 0.64, + "learning_rate": 3.842916875196066e-05, + "loss": 0.7212, + "step": 9030 + }, + { + "epoch": 0.64, + "learning_rate": 3.84057137023211e-05, + "loss": 0.734, + "step": 9040 + }, + { + "epoch": 0.64, + "learning_rate": 3.8382242078589006e-05, + "loss": 0.7038, + "step": 9050 + }, + { + "epoch": 0.64, + "learning_rate": 3.8358753909783405e-05, + "loss": 0.7444, + "step": 9060 + }, + { + "epoch": 0.64, + "learning_rate": 3.83352492249438e-05, + "loss": 0.7663, + "step": 9070 + }, + { + "epoch": 0.64, + "learning_rate": 3.831172805313009e-05, + "loss": 0.7659, + "step": 9080 + }, + { + "epoch": 0.64, + "learning_rate": 3.8288190423422585e-05, + "loss": 0.7406, + "step": 9090 + }, + { + "epoch": 0.64, + "learning_rate": 3.8264636364921904e-05, + "loss": 0.7292, + "step": 9100 + }, + { + "epoch": 0.64, + "learning_rate": 3.824106590674901e-05, + "loss": 0.7383, + "step": 9110 + }, + { + "epoch": 0.65, + "learning_rate": 3.821747907804513e-05, + "loss": 0.7222, + "step": 9120 + }, + { + "epoch": 0.65, + "learning_rate": 3.819387590797172e-05, + "loss": 0.7535, + "step": 9130 + }, + { + "epoch": 0.65, + "learning_rate": 3.817025642571046e-05, + "loss": 0.7512, + "step": 9140 + }, + { + "epoch": 0.65, + "learning_rate": 3.814662066046319e-05, + "loss": 0.7285, + "step": 9150 + }, + { + "epoch": 0.65, + "learning_rate": 3.81229686414519e-05, + "loss": 0.7604, + "step": 9160 + }, + { + "epoch": 0.65, + "learning_rate": 3.8099300397918606e-05, + "loss": 0.7449, + "step": 9170 + }, + { + "epoch": 0.65, + "learning_rate": 3.8075615959125465e-05, + "loss": 0.7395, + "step": 9180 + }, + { + "epoch": 0.65, + "learning_rate": 3.805191535435463e-05, + "loss": 0.7444, + "step": 9190 + }, + { + "epoch": 0.65, + "learning_rate": 3.802819861290822e-05, + "loss": 0.7471, + "step": 9200 + }, + { + "epoch": 0.65, + "learning_rate": 3.800446576410831e-05, + "loss": 0.7874, + "step": 9210 + }, + { + "epoch": 0.65, + "learning_rate": 3.7980716837296924e-05, + "loss": 0.7581, + "step": 9220 + }, + { + "epoch": 0.65, + "learning_rate": 3.795695186183592e-05, + "loss": 0.7719, + "step": 9230 + }, + { + "epoch": 0.65, + "learning_rate": 3.793317086710703e-05, + "loss": 0.7324, + "step": 9240 + }, + { + "epoch": 0.65, + "learning_rate": 3.790937388251176e-05, + "loss": 0.752, + "step": 9250 + }, + { + "epoch": 0.66, + "learning_rate": 3.788556093747142e-05, + "loss": 0.7395, + "step": 9260 + }, + { + "epoch": 0.66, + "learning_rate": 3.7861732061427024e-05, + "loss": 0.7337, + "step": 9270 + }, + { + "epoch": 0.66, + "learning_rate": 3.783788728383929e-05, + "loss": 0.7559, + "step": 9280 + }, + { + "epoch": 0.66, + "learning_rate": 3.7814026634188616e-05, + "loss": 0.7456, + "step": 9290 + }, + { + "epoch": 0.66, + "learning_rate": 3.779015014197499e-05, + "loss": 0.7293, + "step": 9300 + }, + { + "epoch": 0.66, + "learning_rate": 3.776625783671802e-05, + "loss": 0.7386, + "step": 9310 + }, + { + "epoch": 0.66, + "learning_rate": 3.774234974795683e-05, + "loss": 0.711, + "step": 9320 + }, + { + "epoch": 0.66, + "learning_rate": 3.771842590525008e-05, + "loss": 0.7369, + "step": 9330 + }, + { + "epoch": 0.66, + "learning_rate": 3.769448633817591e-05, + "loss": 0.7446, + "step": 9340 + }, + { + "epoch": 0.66, + "learning_rate": 3.7670531076331895e-05, + "loss": 0.7554, + "step": 9350 + }, + { + "epoch": 0.66, + "learning_rate": 3.7646560149334995e-05, + "loss": 0.7632, + "step": 9360 + }, + { + "epoch": 0.66, + "learning_rate": 3.762257358682158e-05, + "loss": 0.7249, + "step": 9370 + }, + { + "epoch": 0.66, + "learning_rate": 3.759857141844732e-05, + "loss": 0.7343, + "step": 9380 + }, + { + "epoch": 0.66, + "learning_rate": 3.7574553673887164e-05, + "loss": 0.747, + "step": 9390 + }, + { + "epoch": 0.67, + "learning_rate": 3.7550520382835365e-05, + "loss": 0.7378, + "step": 9400 + }, + { + "epoch": 0.67, + "learning_rate": 3.752647157500536e-05, + "loss": 0.7587, + "step": 9410 + }, + { + "epoch": 0.67, + "learning_rate": 3.750240728012979e-05, + "loss": 0.7305, + "step": 9420 + }, + { + "epoch": 0.67, + "learning_rate": 3.7478327527960424e-05, + "loss": 0.7188, + "step": 9430 + }, + { + "epoch": 0.67, + "learning_rate": 3.745423234826817e-05, + "loss": 0.7295, + "step": 9440 + }, + { + "epoch": 0.67, + "learning_rate": 3.7430121770842974e-05, + "loss": 0.7137, + "step": 9450 + }, + { + "epoch": 0.67, + "learning_rate": 3.7405995825493855e-05, + "loss": 0.7619, + "step": 9460 + }, + { + "epoch": 0.67, + "learning_rate": 3.73818545420488e-05, + "loss": 0.7388, + "step": 9470 + }, + { + "epoch": 0.67, + "learning_rate": 3.735769795035477e-05, + "loss": 0.7496, + "step": 9480 + }, + { + "epoch": 0.67, + "learning_rate": 3.733352608027768e-05, + "loss": 0.7716, + "step": 9490 + }, + { + "epoch": 0.67, + "learning_rate": 3.730933896170229e-05, + "loss": 0.7513, + "step": 9500 + }, + { + "epoch": 0.67, + "learning_rate": 3.7285136624532244e-05, + "loss": 0.7472, + "step": 9510 + }, + { + "epoch": 0.67, + "learning_rate": 3.726091909868998e-05, + "loss": 0.726, + "step": 9520 + }, + { + "epoch": 0.67, + "learning_rate": 3.7236686414116736e-05, + "loss": 0.728, + "step": 9530 + }, + { + "epoch": 0.68, + "learning_rate": 3.721243860077247e-05, + "loss": 0.7283, + "step": 9540 + }, + { + "epoch": 0.68, + "learning_rate": 3.718817568863586e-05, + "loss": 0.7674, + "step": 9550 + }, + { + "epoch": 0.68, + "learning_rate": 3.7163897707704244e-05, + "loss": 0.738, + "step": 9560 + }, + { + "epoch": 0.68, + "learning_rate": 3.71396046879936e-05, + "loss": 0.7461, + "step": 9570 + }, + { + "epoch": 0.68, + "learning_rate": 3.711529665953847e-05, + "loss": 0.7427, + "step": 9580 + }, + { + "epoch": 0.68, + "learning_rate": 3.7090973652392e-05, + "loss": 0.7268, + "step": 9590 + }, + { + "epoch": 0.68, + "learning_rate": 3.706663569662581e-05, + "loss": 0.7508, + "step": 9600 + }, + { + "epoch": 0.68, + "learning_rate": 3.704228282233003e-05, + "loss": 0.7623, + "step": 9610 + }, + { + "epoch": 0.68, + "learning_rate": 3.7017915059613214e-05, + "loss": 0.7626, + "step": 9620 + }, + { + "epoch": 0.68, + "learning_rate": 3.699353243860235e-05, + "loss": 0.7394, + "step": 9630 + }, + { + "epoch": 0.68, + "learning_rate": 3.696913498944276e-05, + "loss": 0.7422, + "step": 9640 + }, + { + "epoch": 0.68, + "learning_rate": 3.6944722742298135e-05, + "loss": 0.7552, + "step": 9650 + }, + { + "epoch": 0.68, + "learning_rate": 3.692029572735042e-05, + "loss": 0.6867, + "step": 9660 + }, + { + "epoch": 0.68, + "learning_rate": 3.6895853974799876e-05, + "loss": 0.7644, + "step": 9670 + }, + { + "epoch": 0.69, + "learning_rate": 3.6871397514864924e-05, + "loss": 0.7547, + "step": 9680 + }, + { + "epoch": 0.69, + "learning_rate": 3.6846926377782216e-05, + "loss": 0.7313, + "step": 9690 + }, + { + "epoch": 0.69, + "learning_rate": 3.682244059380651e-05, + "loss": 0.7643, + "step": 9700 + }, + { + "epoch": 0.69, + "learning_rate": 3.6797940193210714e-05, + "loss": 0.7561, + "step": 9710 + }, + { + "epoch": 0.69, + "learning_rate": 3.6773425206285765e-05, + "loss": 0.7326, + "step": 9720 + }, + { + "epoch": 0.69, + "learning_rate": 3.674889566334067e-05, + "loss": 0.7435, + "step": 9730 + }, + { + "epoch": 0.69, + "learning_rate": 3.6724351594702404e-05, + "loss": 0.7259, + "step": 9740 + }, + { + "epoch": 0.69, + "learning_rate": 3.6699793030715933e-05, + "loss": 0.7106, + "step": 9750 + }, + { + "epoch": 0.69, + "learning_rate": 3.66752200017441e-05, + "loss": 0.7552, + "step": 9760 + }, + { + "epoch": 0.69, + "learning_rate": 3.6650632538167674e-05, + "loss": 0.7305, + "step": 9770 + }, + { + "epoch": 0.69, + "learning_rate": 3.662603067038524e-05, + "loss": 0.7236, + "step": 9780 + }, + { + "epoch": 0.69, + "learning_rate": 3.660141442881322e-05, + "loss": 0.7464, + "step": 9790 + }, + { + "epoch": 0.69, + "learning_rate": 3.657678384388578e-05, + "loss": 0.7186, + "step": 9800 + }, + { + "epoch": 0.69, + "learning_rate": 3.655213894605483e-05, + "loss": 0.7587, + "step": 9810 + }, + { + "epoch": 0.7, + "learning_rate": 3.652747976578998e-05, + "loss": 0.7431, + "step": 9820 + }, + { + "epoch": 0.7, + "learning_rate": 3.650280633357849e-05, + "loss": 0.7776, + "step": 9830 + }, + { + "epoch": 0.7, + "learning_rate": 3.6478118679925254e-05, + "loss": 0.7266, + "step": 9840 + }, + { + "epoch": 0.7, + "learning_rate": 3.6453416835352725e-05, + "loss": 0.7521, + "step": 9850 + }, + { + "epoch": 0.7, + "learning_rate": 3.642870083040093e-05, + "loss": 0.7532, + "step": 9860 + }, + { + "epoch": 0.7, + "learning_rate": 3.6403970695627384e-05, + "loss": 0.7215, + "step": 9870 + }, + { + "epoch": 0.7, + "learning_rate": 3.637922646160706e-05, + "loss": 0.7475, + "step": 9880 + }, + { + "epoch": 0.7, + "learning_rate": 3.6354468158932395e-05, + "loss": 0.757, + "step": 9890 + }, + { + "epoch": 0.7, + "learning_rate": 3.632969581821321e-05, + "loss": 0.7066, + "step": 9900 + }, + { + "epoch": 0.7, + "learning_rate": 3.6304909470076645e-05, + "loss": 0.7627, + "step": 9910 + }, + { + "epoch": 0.7, + "learning_rate": 3.628010914516723e-05, + "loss": 0.7341, + "step": 9920 + }, + { + "epoch": 0.7, + "learning_rate": 3.6255294874146684e-05, + "loss": 0.7256, + "step": 9930 + }, + { + "epoch": 0.7, + "learning_rate": 3.6230466687694054e-05, + "loss": 0.7241, + "step": 9940 + }, + { + "epoch": 0.7, + "learning_rate": 3.620562461650553e-05, + "loss": 0.7269, + "step": 9950 + }, + { + "epoch": 0.7, + "learning_rate": 3.618076869129452e-05, + "loss": 0.7487, + "step": 9960 + }, + { + "epoch": 0.71, + "learning_rate": 3.61558989427915e-05, + "loss": 0.735, + "step": 9970 + }, + { + "epoch": 0.71, + "learning_rate": 3.61310154017441e-05, + "loss": 0.7476, + "step": 9980 + }, + { + "epoch": 0.71, + "learning_rate": 3.6106118098916954e-05, + "loss": 0.7394, + "step": 9990 + }, + { + "epoch": 0.71, + "learning_rate": 3.608120706509173e-05, + "loss": 0.7288, + "step": 10000 + }, + { + "epoch": 0.71, + "learning_rate": 3.605628233106707e-05, + "loss": 0.7491, + "step": 10010 + }, + { + "epoch": 0.71, + "learning_rate": 3.6031343927658564e-05, + "loss": 0.7687, + "step": 10020 + }, + { + "epoch": 0.71, + "learning_rate": 3.600639188569868e-05, + "loss": 0.7579, + "step": 10030 + }, + { + "epoch": 0.71, + "learning_rate": 3.598142623603676e-05, + "loss": 0.7054, + "step": 10040 + }, + { + "epoch": 0.71, + "learning_rate": 3.595644700953898e-05, + "loss": 0.7501, + "step": 10050 + }, + { + "epoch": 0.71, + "learning_rate": 3.5931454237088283e-05, + "loss": 0.713, + "step": 10060 + }, + { + "epoch": 0.71, + "learning_rate": 3.590644794958438e-05, + "loss": 0.735, + "step": 10070 + }, + { + "epoch": 0.71, + "learning_rate": 3.5881428177943674e-05, + "loss": 0.7051, + "step": 10080 + }, + { + "epoch": 0.71, + "learning_rate": 3.5856394953099234e-05, + "loss": 0.75, + "step": 10090 + }, + { + "epoch": 0.71, + "learning_rate": 3.583134830600079e-05, + "loss": 0.7514, + "step": 10100 + }, + { + "epoch": 0.72, + "learning_rate": 3.5806288267614636e-05, + "loss": 0.7233, + "step": 10110 + }, + { + "epoch": 0.72, + "learning_rate": 3.5781214868923633e-05, + "loss": 0.7099, + "step": 10120 + }, + { + "epoch": 0.72, + "learning_rate": 3.575612814092718e-05, + "loss": 0.7144, + "step": 10130 + }, + { + "epoch": 0.72, + "learning_rate": 3.5731028114641116e-05, + "loss": 0.7626, + "step": 10140 + }, + { + "epoch": 0.72, + "learning_rate": 3.570591482109777e-05, + "loss": 0.7193, + "step": 10150 + }, + { + "epoch": 0.72, + "learning_rate": 3.568078829134582e-05, + "loss": 0.737, + "step": 10160 + }, + { + "epoch": 0.72, + "learning_rate": 3.5655648556450356e-05, + "loss": 0.7606, + "step": 10170 + }, + { + "epoch": 0.72, + "learning_rate": 3.563049564749275e-05, + "loss": 0.7435, + "step": 10180 + }, + { + "epoch": 0.72, + "learning_rate": 3.5605329595570714e-05, + "loss": 0.7496, + "step": 10190 + }, + { + "epoch": 0.72, + "learning_rate": 3.558015043179816e-05, + "loss": 0.7282, + "step": 10200 + }, + { + "epoch": 0.72, + "learning_rate": 3.555495818730524e-05, + "loss": 0.7563, + "step": 10210 + }, + { + "epoch": 0.72, + "learning_rate": 3.5529752893238264e-05, + "loss": 0.7196, + "step": 10220 + }, + { + "epoch": 0.72, + "learning_rate": 3.5504534580759695e-05, + "loss": 0.761, + "step": 10230 + }, + { + "epoch": 0.72, + "learning_rate": 3.547930328104806e-05, + "loss": 0.7364, + "step": 10240 + }, + { + "epoch": 0.73, + "learning_rate": 3.545405902529797e-05, + "loss": 0.7307, + "step": 10250 + }, + { + "epoch": 0.73, + "learning_rate": 3.542880184472004e-05, + "loss": 0.7517, + "step": 10260 + }, + { + "epoch": 0.73, + "learning_rate": 3.540353177054088e-05, + "loss": 0.7236, + "step": 10270 + }, + { + "epoch": 0.73, + "learning_rate": 3.5378248834003017e-05, + "loss": 0.73, + "step": 10280 + }, + { + "epoch": 0.73, + "learning_rate": 3.535295306636489e-05, + "loss": 0.7336, + "step": 10290 + }, + { + "epoch": 0.73, + "learning_rate": 3.5327644498900824e-05, + "loss": 0.7248, + "step": 10300 + }, + { + "epoch": 0.73, + "learning_rate": 3.530232316290094e-05, + "loss": 0.7291, + "step": 10310 + }, + { + "epoch": 0.73, + "learning_rate": 3.5276989089671154e-05, + "loss": 0.7609, + "step": 10320 + }, + { + "epoch": 0.73, + "learning_rate": 3.5251642310533135e-05, + "loss": 0.7445, + "step": 10330 + }, + { + "epoch": 0.73, + "learning_rate": 3.522628285682425e-05, + "loss": 0.7711, + "step": 10340 + }, + { + "epoch": 0.73, + "learning_rate": 3.520091075989755e-05, + "loss": 0.7469, + "step": 10350 + }, + { + "epoch": 0.73, + "learning_rate": 3.517552605112171e-05, + "loss": 0.7453, + "step": 10360 + }, + { + "epoch": 0.73, + "learning_rate": 3.515012876188099e-05, + "loss": 0.726, + "step": 10370 + }, + { + "epoch": 0.73, + "learning_rate": 3.512471892357522e-05, + "loss": 0.7439, + "step": 10380 + }, + { + "epoch": 0.74, + "learning_rate": 3.509929656761973e-05, + "loss": 0.7299, + "step": 10390 + }, + { + "epoch": 0.74, + "learning_rate": 3.507386172544534e-05, + "loss": 0.7795, + "step": 10400 + }, + { + "epoch": 0.74, + "learning_rate": 3.50484144284983e-05, + "loss": 0.7389, + "step": 10410 + }, + { + "epoch": 0.74, + "learning_rate": 3.502295470824026e-05, + "loss": 0.7409, + "step": 10420 + }, + { + "epoch": 0.74, + "learning_rate": 3.4997482596148215e-05, + "loss": 0.7453, + "step": 10430 + }, + { + "epoch": 0.74, + "learning_rate": 3.497199812371451e-05, + "loss": 0.7331, + "step": 10440 + }, + { + "epoch": 0.74, + "learning_rate": 3.4946501322446745e-05, + "loss": 0.7345, + "step": 10450 + }, + { + "epoch": 0.74, + "learning_rate": 3.4920992223867784e-05, + "loss": 0.7448, + "step": 10460 + }, + { + "epoch": 0.74, + "learning_rate": 3.489547085951567e-05, + "loss": 0.7118, + "step": 10470 + }, + { + "epoch": 0.74, + "learning_rate": 3.486993726094363e-05, + "loss": 0.741, + "step": 10480 + }, + { + "epoch": 0.74, + "learning_rate": 3.4844391459720014e-05, + "loss": 0.708, + "step": 10490 + }, + { + "epoch": 0.74, + "learning_rate": 3.481883348742826e-05, + "loss": 0.7703, + "step": 10500 + }, + { + "epoch": 0.74, + "learning_rate": 3.479326337566683e-05, + "loss": 0.7467, + "step": 10510 + }, + { + "epoch": 0.74, + "learning_rate": 3.4767681156049236e-05, + "loss": 0.7501, + "step": 10520 + }, + { + "epoch": 0.75, + "learning_rate": 3.4742086860203926e-05, + "loss": 0.764, + "step": 10530 + }, + { + "epoch": 0.75, + "learning_rate": 3.47164805197743e-05, + "loss": 0.7412, + "step": 10540 + }, + { + "epoch": 0.75, + "learning_rate": 3.469086216641863e-05, + "loss": 0.7403, + "step": 10550 + }, + { + "epoch": 0.75, + "learning_rate": 3.466523183181005e-05, + "loss": 0.7317, + "step": 10560 + }, + { + "epoch": 0.75, + "learning_rate": 3.463958954763652e-05, + "loss": 0.7539, + "step": 10570 + }, + { + "epoch": 0.75, + "learning_rate": 3.461393534560073e-05, + "loss": 0.7554, + "step": 10580 + }, + { + "epoch": 0.75, + "learning_rate": 3.458826925742017e-05, + "loss": 0.7161, + "step": 10590 + }, + { + "epoch": 0.75, + "learning_rate": 3.456259131482696e-05, + "loss": 0.7023, + "step": 10600 + }, + { + "epoch": 0.75, + "learning_rate": 3.453690154956793e-05, + "loss": 0.7644, + "step": 10610 + }, + { + "epoch": 0.75, + "learning_rate": 3.4511199993404496e-05, + "loss": 0.7552, + "step": 10620 + }, + { + "epoch": 0.75, + "learning_rate": 3.448548667811265e-05, + "loss": 0.7156, + "step": 10630 + }, + { + "epoch": 0.75, + "learning_rate": 3.445976163548294e-05, + "loss": 0.7464, + "step": 10640 + }, + { + "epoch": 0.75, + "learning_rate": 3.443402489732041e-05, + "loss": 0.7252, + "step": 10650 + }, + { + "epoch": 0.75, + "learning_rate": 3.4408276495444534e-05, + "loss": 0.7355, + "step": 10660 + }, + { + "epoch": 0.76, + "learning_rate": 3.438251646168926e-05, + "loss": 0.7304, + "step": 10670 + }, + { + "epoch": 0.76, + "learning_rate": 3.435674482790287e-05, + "loss": 0.7544, + "step": 10680 + }, + { + "epoch": 0.76, + "learning_rate": 3.433096162594801e-05, + "loss": 0.7299, + "step": 10690 + }, + { + "epoch": 0.76, + "learning_rate": 3.430516688770161e-05, + "loss": 0.7387, + "step": 10700 + }, + { + "epoch": 0.76, + "learning_rate": 3.4279360645054905e-05, + "loss": 0.7235, + "step": 10710 + }, + { + "epoch": 0.76, + "learning_rate": 3.425354292991329e-05, + "loss": 0.7559, + "step": 10720 + }, + { + "epoch": 0.76, + "learning_rate": 3.4227713774196415e-05, + "loss": 0.7226, + "step": 10730 + }, + { + "epoch": 0.76, + "learning_rate": 3.4201873209838e-05, + "loss": 0.7245, + "step": 10740 + }, + { + "epoch": 0.76, + "learning_rate": 3.417602126878593e-05, + "loss": 0.7257, + "step": 10750 + }, + { + "epoch": 0.76, + "learning_rate": 3.415015798300214e-05, + "loss": 0.7327, + "step": 10760 + }, + { + "epoch": 0.76, + "learning_rate": 3.412428338446257e-05, + "loss": 0.7503, + "step": 10770 + }, + { + "epoch": 0.76, + "learning_rate": 3.409839750515717e-05, + "loss": 0.7504, + "step": 10780 + }, + { + "epoch": 0.76, + "learning_rate": 3.407250037708982e-05, + "loss": 0.716, + "step": 10790 + }, + { + "epoch": 0.76, + "learning_rate": 3.404659203227832e-05, + "loss": 0.7614, + "step": 10800 + }, + { + "epoch": 0.77, + "learning_rate": 3.4020672502754333e-05, + "loss": 0.7691, + "step": 10810 + }, + { + "epoch": 0.77, + "learning_rate": 3.3994741820563344e-05, + "loss": 0.7403, + "step": 10820 + }, + { + "epoch": 0.77, + "learning_rate": 3.3968800017764645e-05, + "loss": 0.7404, + "step": 10830 + }, + { + "epoch": 0.77, + "learning_rate": 3.394284712643126e-05, + "loss": 0.7394, + "step": 10840 + }, + { + "epoch": 0.77, + "learning_rate": 3.391688317864992e-05, + "loss": 0.7452, + "step": 10850 + }, + { + "epoch": 0.77, + "learning_rate": 3.389090820652104e-05, + "loss": 0.7121, + "step": 10860 + }, + { + "epoch": 0.77, + "learning_rate": 3.386492224215865e-05, + "loss": 0.7231, + "step": 10870 + }, + { + "epoch": 0.77, + "learning_rate": 3.383892531769039e-05, + "loss": 0.7617, + "step": 10880 + }, + { + "epoch": 0.77, + "learning_rate": 3.381291746525742e-05, + "loss": 0.7573, + "step": 10890 + }, + { + "epoch": 0.77, + "learning_rate": 3.378689871701445e-05, + "loss": 0.7483, + "step": 10900 + }, + { + "epoch": 0.77, + "learning_rate": 3.376086910512962e-05, + "loss": 0.742, + "step": 10910 + }, + { + "epoch": 0.77, + "learning_rate": 3.3734828661784535e-05, + "loss": 0.7302, + "step": 10920 + }, + { + "epoch": 0.77, + "learning_rate": 3.370877741917418e-05, + "loss": 0.6999, + "step": 10930 + }, + { + "epoch": 0.77, + "learning_rate": 3.368271540950687e-05, + "loss": 0.7196, + "step": 10940 + }, + { + "epoch": 0.78, + "learning_rate": 3.365664266500426e-05, + "loss": 0.7372, + "step": 10950 + }, + { + "epoch": 0.78, + "learning_rate": 3.363055921790128e-05, + "loss": 0.768, + "step": 10960 + }, + { + "epoch": 0.78, + "learning_rate": 3.3604465100446064e-05, + "loss": 0.7356, + "step": 10970 + }, + { + "epoch": 0.78, + "learning_rate": 3.3578360344899965e-05, + "loss": 0.7345, + "step": 10980 + }, + { + "epoch": 0.78, + "learning_rate": 3.355224498353747e-05, + "loss": 0.708, + "step": 10990 + }, + { + "epoch": 0.78, + "learning_rate": 3.3526119048646196e-05, + "loss": 0.7387, + "step": 11000 + }, + { + "epoch": 0.78, + "learning_rate": 3.349998257252681e-05, + "loss": 0.7346, + "step": 11010 + }, + { + "epoch": 0.78, + "learning_rate": 3.347383558749303e-05, + "loss": 0.7535, + "step": 11020 + }, + { + "epoch": 0.78, + "learning_rate": 3.344767812587157e-05, + "loss": 0.7271, + "step": 11030 + }, + { + "epoch": 0.78, + "learning_rate": 3.342151022000207e-05, + "loss": 0.7259, + "step": 11040 + }, + { + "epoch": 0.78, + "learning_rate": 3.339533190223711e-05, + "loss": 0.7319, + "step": 11050 + }, + { + "epoch": 0.78, + "learning_rate": 3.3369143204942125e-05, + "loss": 0.7324, + "step": 11060 + }, + { + "epoch": 0.78, + "learning_rate": 3.3342944160495406e-05, + "loss": 0.7375, + "step": 11070 + }, + { + "epoch": 0.78, + "learning_rate": 3.331673480128801e-05, + "loss": 0.7354, + "step": 11080 + }, + { + "epoch": 0.78, + "learning_rate": 3.329051515972376e-05, + "loss": 0.7361, + "step": 11090 + }, + { + "epoch": 0.79, + "learning_rate": 3.326428526821919e-05, + "loss": 0.7464, + "step": 11100 + }, + { + "epoch": 0.79, + "learning_rate": 3.3238045159203494e-05, + "loss": 0.7313, + "step": 11110 + }, + { + "epoch": 0.79, + "learning_rate": 3.321179486511853e-05, + "loss": 0.7223, + "step": 11120 + }, + { + "epoch": 0.79, + "learning_rate": 3.318553441841872e-05, + "loss": 0.7402, + "step": 11130 + }, + { + "epoch": 0.79, + "learning_rate": 3.315926385157105e-05, + "loss": 0.7253, + "step": 11140 + }, + { + "epoch": 0.79, + "learning_rate": 3.313298319705501e-05, + "loss": 0.726, + "step": 11150 + }, + { + "epoch": 0.79, + "learning_rate": 3.3106692487362555e-05, + "loss": 0.7543, + "step": 11160 + }, + { + "epoch": 0.79, + "learning_rate": 3.3080391754998106e-05, + "loss": 0.728, + "step": 11170 + }, + { + "epoch": 0.79, + "learning_rate": 3.305408103247845e-05, + "loss": 0.7323, + "step": 11180 + }, + { + "epoch": 0.79, + "learning_rate": 3.3027760352332705e-05, + "loss": 0.7665, + "step": 11190 + }, + { + "epoch": 0.79, + "learning_rate": 3.300142974710234e-05, + "loss": 0.7486, + "step": 11200 + }, + { + "epoch": 0.79, + "learning_rate": 3.297508924934108e-05, + "loss": 0.7451, + "step": 11210 + }, + { + "epoch": 0.79, + "learning_rate": 3.2948738891614876e-05, + "loss": 0.7647, + "step": 11220 + }, + { + "epoch": 0.79, + "learning_rate": 3.292237870650187e-05, + "loss": 0.7415, + "step": 11230 + }, + { + "epoch": 0.8, + "learning_rate": 3.289600872659235e-05, + "loss": 0.746, + "step": 11240 + }, + { + "epoch": 0.8, + "learning_rate": 3.286962898448873e-05, + "loss": 0.7256, + "step": 11250 + }, + { + "epoch": 0.8, + "learning_rate": 3.284323951280547e-05, + "loss": 0.745, + "step": 11260 + }, + { + "epoch": 0.8, + "learning_rate": 3.281684034416909e-05, + "loss": 0.7154, + "step": 11270 + }, + { + "epoch": 0.8, + "learning_rate": 3.2790431511218064e-05, + "loss": 0.7422, + "step": 11280 + }, + { + "epoch": 0.8, + "learning_rate": 3.276401304660284e-05, + "loss": 0.7168, + "step": 11290 + }, + { + "epoch": 0.8, + "learning_rate": 3.2737584982985766e-05, + "loss": 0.7441, + "step": 11300 + }, + { + "epoch": 0.8, + "learning_rate": 3.271114735304105e-05, + "loss": 0.7541, + "step": 11310 + }, + { + "epoch": 0.8, + "learning_rate": 3.2684700189454744e-05, + "loss": 0.7001, + "step": 11320 + }, + { + "epoch": 0.8, + "learning_rate": 3.265824352492467e-05, + "loss": 0.7379, + "step": 11330 + }, + { + "epoch": 0.8, + "learning_rate": 3.2631777392160403e-05, + "loss": 0.72, + "step": 11340 + }, + { + "epoch": 0.8, + "learning_rate": 3.2605301823883226e-05, + "loss": 0.7386, + "step": 11350 + }, + { + "epoch": 0.8, + "learning_rate": 3.257881685282609e-05, + "loss": 0.7074, + "step": 11360 + }, + { + "epoch": 0.8, + "learning_rate": 3.255232251173357e-05, + "loss": 0.7308, + "step": 11370 + }, + { + "epoch": 0.81, + "learning_rate": 3.252581883336181e-05, + "loss": 0.7069, + "step": 11380 + }, + { + "epoch": 0.81, + "learning_rate": 3.249930585047852e-05, + "loss": 0.7334, + "step": 11390 + }, + { + "epoch": 0.81, + "learning_rate": 3.2472783595862896e-05, + "loss": 0.7444, + "step": 11400 + }, + { + "epoch": 0.81, + "learning_rate": 3.2446252102305625e-05, + "loss": 0.7503, + "step": 11410 + }, + { + "epoch": 0.81, + "learning_rate": 3.2419711402608774e-05, + "loss": 0.7331, + "step": 11420 + }, + { + "epoch": 0.81, + "learning_rate": 3.2393161529585836e-05, + "loss": 0.7449, + "step": 11430 + }, + { + "epoch": 0.81, + "learning_rate": 3.236660251606161e-05, + "loss": 0.7125, + "step": 11440 + }, + { + "epoch": 0.81, + "learning_rate": 3.2340034394872217e-05, + "loss": 0.7201, + "step": 11450 + }, + { + "epoch": 0.81, + "learning_rate": 3.231345719886502e-05, + "loss": 0.7293, + "step": 11460 + }, + { + "epoch": 0.81, + "learning_rate": 3.228687096089863e-05, + "loss": 0.7301, + "step": 11470 + }, + { + "epoch": 0.81, + "learning_rate": 3.226027571384281e-05, + "loss": 0.7094, + "step": 11480 + }, + { + "epoch": 0.81, + "learning_rate": 3.2233671490578474e-05, + "loss": 0.7153, + "step": 11490 + }, + { + "epoch": 0.81, + "learning_rate": 3.220705832399763e-05, + "loss": 0.7271, + "step": 11500 + }, + { + "epoch": 0.81, + "learning_rate": 3.218043624700335e-05, + "loss": 0.731, + "step": 11510 + }, + { + "epoch": 0.82, + "learning_rate": 3.215380529250971e-05, + "loss": 0.7227, + "step": 11520 + }, + { + "epoch": 0.82, + "learning_rate": 3.212716549344177e-05, + "loss": 0.7455, + "step": 11530 + }, + { + "epoch": 0.82, + "learning_rate": 3.210051688273552e-05, + "loss": 0.7609, + "step": 11540 + }, + { + "epoch": 0.82, + "learning_rate": 3.207385949333785e-05, + "loss": 0.7306, + "step": 11550 + }, + { + "epoch": 0.82, + "learning_rate": 3.204719335820651e-05, + "loss": 0.7132, + "step": 11560 + }, + { + "epoch": 0.82, + "learning_rate": 3.202051851031004e-05, + "loss": 0.735, + "step": 11570 + }, + { + "epoch": 0.82, + "learning_rate": 3.199383498262777e-05, + "loss": 0.7182, + "step": 11580 + }, + { + "epoch": 0.82, + "learning_rate": 3.196714280814976e-05, + "loss": 0.7235, + "step": 11590 + }, + { + "epoch": 0.82, + "learning_rate": 3.194044201987675e-05, + "loss": 0.7094, + "step": 11600 + }, + { + "epoch": 0.82, + "learning_rate": 3.191373265082015e-05, + "loss": 0.7078, + "step": 11610 + }, + { + "epoch": 0.82, + "learning_rate": 3.188701473400195e-05, + "loss": 0.7232, + "step": 11620 + }, + { + "epoch": 0.82, + "learning_rate": 3.1860288302454735e-05, + "loss": 0.7361, + "step": 11630 + }, + { + "epoch": 0.82, + "learning_rate": 3.18335533892216e-05, + "loss": 0.7037, + "step": 11640 + }, + { + "epoch": 0.82, + "learning_rate": 3.180681002735614e-05, + "loss": 0.7403, + "step": 11650 + }, + { + "epoch": 0.83, + "learning_rate": 3.178005824992237e-05, + "loss": 0.7395, + "step": 11660 + }, + { + "epoch": 0.83, + "learning_rate": 3.175329808999475e-05, + "loss": 0.738, + "step": 11670 + }, + { + "epoch": 0.83, + "learning_rate": 3.172652958065806e-05, + "loss": 0.7386, + "step": 11680 + }, + { + "epoch": 0.83, + "learning_rate": 3.169975275500743e-05, + "loss": 0.6953, + "step": 11690 + }, + { + "epoch": 0.83, + "learning_rate": 3.1672967646148285e-05, + "loss": 0.7369, + "step": 11700 + }, + { + "epoch": 0.83, + "learning_rate": 3.164617428719624e-05, + "loss": 0.737, + "step": 11710 + }, + { + "epoch": 0.83, + "learning_rate": 3.161937271127717e-05, + "loss": 0.7133, + "step": 11720 + }, + { + "epoch": 0.83, + "learning_rate": 3.159256295152705e-05, + "loss": 0.7289, + "step": 11730 + }, + { + "epoch": 0.83, + "learning_rate": 3.156574504109203e-05, + "loss": 0.7018, + "step": 11740 + }, + { + "epoch": 0.83, + "learning_rate": 3.1538919013128295e-05, + "loss": 0.7293, + "step": 11750 + }, + { + "epoch": 0.83, + "learning_rate": 3.151208490080209e-05, + "loss": 0.7382, + "step": 11760 + }, + { + "epoch": 0.83, + "learning_rate": 3.148524273728964e-05, + "loss": 0.7483, + "step": 11770 + }, + { + "epoch": 0.83, + "learning_rate": 3.145839255577714e-05, + "loss": 0.7483, + "step": 11780 + }, + { + "epoch": 0.83, + "learning_rate": 3.1431534389460665e-05, + "loss": 0.7278, + "step": 11790 + }, + { + "epoch": 0.84, + "learning_rate": 3.140466827154622e-05, + "loss": 0.7551, + "step": 11800 + }, + { + "epoch": 0.84, + "learning_rate": 3.137779423524958e-05, + "loss": 0.7652, + "step": 11810 + }, + { + "epoch": 0.84, + "learning_rate": 3.1350912313796336e-05, + "loss": 0.7296, + "step": 11820 + }, + { + "epoch": 0.84, + "learning_rate": 3.132402254042185e-05, + "loss": 0.722, + "step": 11830 + }, + { + "epoch": 0.84, + "learning_rate": 3.129712494837115e-05, + "loss": 0.6992, + "step": 11840 + }, + { + "epoch": 0.84, + "learning_rate": 3.127021957089896e-05, + "loss": 0.7204, + "step": 11850 + }, + { + "epoch": 0.84, + "learning_rate": 3.124330644126962e-05, + "loss": 0.7393, + "step": 11860 + }, + { + "epoch": 0.84, + "learning_rate": 3.1216385592757045e-05, + "loss": 0.7287, + "step": 11870 + }, + { + "epoch": 0.84, + "learning_rate": 3.118945705864471e-05, + "loss": 0.7548, + "step": 11880 + }, + { + "epoch": 0.84, + "learning_rate": 3.1162520872225584e-05, + "loss": 0.7513, + "step": 11890 + }, + { + "epoch": 0.84, + "learning_rate": 3.11355770668021e-05, + "loss": 0.724, + "step": 11900 + }, + { + "epoch": 0.84, + "learning_rate": 3.11086256756861e-05, + "loss": 0.7224, + "step": 11910 + }, + { + "epoch": 0.84, + "learning_rate": 3.1081666732198805e-05, + "loss": 0.7403, + "step": 11920 + }, + { + "epoch": 0.84, + "learning_rate": 3.1054700269670814e-05, + "loss": 0.7338, + "step": 11930 + }, + { + "epoch": 0.85, + "learning_rate": 3.102772632144195e-05, + "loss": 0.69, + "step": 11940 + }, + { + "epoch": 0.85, + "learning_rate": 3.100074492086136e-05, + "loss": 0.725, + "step": 11950 + }, + { + "epoch": 0.85, + "learning_rate": 3.0973756101287344e-05, + "loss": 0.7465, + "step": 11960 + }, + { + "epoch": 0.85, + "learning_rate": 3.094675989608744e-05, + "loss": 0.7249, + "step": 11970 + }, + { + "epoch": 0.85, + "learning_rate": 3.091975633863826e-05, + "loss": 0.7192, + "step": 11980 + }, + { + "epoch": 0.85, + "learning_rate": 3.089274546232554e-05, + "loss": 0.7273, + "step": 11990 + }, + { + "epoch": 0.85, + "learning_rate": 3.0865727300544026e-05, + "loss": 0.7629, + "step": 12000 + }, + { + "epoch": 0.85, + "learning_rate": 3.083870188669754e-05, + "loss": 0.731, + "step": 12010 + }, + { + "epoch": 0.85, + "learning_rate": 3.081166925419879e-05, + "loss": 0.7557, + "step": 12020 + }, + { + "epoch": 0.85, + "learning_rate": 3.078462943646949e-05, + "loss": 0.7376, + "step": 12030 + }, + { + "epoch": 0.85, + "learning_rate": 3.0757582466940135e-05, + "loss": 0.74, + "step": 12040 + }, + { + "epoch": 0.85, + "learning_rate": 3.073052837905018e-05, + "loss": 0.7296, + "step": 12050 + }, + { + "epoch": 0.85, + "learning_rate": 3.0703467206247784e-05, + "loss": 0.7117, + "step": 12060 + }, + { + "epoch": 0.85, + "learning_rate": 3.067639898198992e-05, + "loss": 0.7598, + "step": 12070 + }, + { + "epoch": 0.86, + "learning_rate": 3.064932373974225e-05, + "loss": 0.7447, + "step": 12080 + }, + { + "epoch": 0.86, + "learning_rate": 3.062224151297915e-05, + "loss": 0.7414, + "step": 12090 + }, + { + "epoch": 0.86, + "learning_rate": 3.059515233518358e-05, + "loss": 0.7199, + "step": 12100 + }, + { + "epoch": 0.86, + "learning_rate": 3.056805623984714e-05, + "loss": 0.7226, + "step": 12110 + }, + { + "epoch": 0.86, + "learning_rate": 3.0540953260469945e-05, + "loss": 0.7223, + "step": 12120 + }, + { + "epoch": 0.86, + "learning_rate": 3.0513843430560657e-05, + "loss": 0.7383, + "step": 12130 + }, + { + "epoch": 0.86, + "learning_rate": 3.0486726783636375e-05, + "loss": 0.741, + "step": 12140 + }, + { + "epoch": 0.86, + "learning_rate": 3.0459603353222643e-05, + "loss": 0.7246, + "step": 12150 + }, + { + "epoch": 0.86, + "learning_rate": 3.0432473172853404e-05, + "loss": 0.7158, + "step": 12160 + }, + { + "epoch": 0.86, + "learning_rate": 3.0405336276070918e-05, + "loss": 0.7089, + "step": 12170 + }, + { + "epoch": 0.86, + "learning_rate": 3.0378192696425768e-05, + "loss": 0.7204, + "step": 12180 + }, + { + "epoch": 0.86, + "learning_rate": 3.0351042467476782e-05, + "loss": 0.7198, + "step": 12190 + }, + { + "epoch": 0.86, + "learning_rate": 3.0323885622791042e-05, + "loss": 0.7504, + "step": 12200 + }, + { + "epoch": 0.86, + "learning_rate": 3.0296722195943767e-05, + "loss": 0.7084, + "step": 12210 + }, + { + "epoch": 0.86, + "learning_rate": 3.026955222051836e-05, + "loss": 0.7328, + "step": 12220 + }, + { + "epoch": 0.87, + "learning_rate": 3.0242375730106265e-05, + "loss": 0.7178, + "step": 12230 + }, + { + "epoch": 0.87, + "learning_rate": 3.0215192758307032e-05, + "loss": 0.7309, + "step": 12240 + }, + { + "epoch": 0.87, + "learning_rate": 3.0188003338728192e-05, + "loss": 0.7368, + "step": 12250 + }, + { + "epoch": 0.87, + "learning_rate": 3.0160807504985278e-05, + "loss": 0.6999, + "step": 12260 + }, + { + "epoch": 0.87, + "learning_rate": 3.0133605290701707e-05, + "loss": 0.7489, + "step": 12270 + }, + { + "epoch": 0.87, + "learning_rate": 3.0106396729508836e-05, + "loss": 0.7134, + "step": 12280 + }, + { + "epoch": 0.87, + "learning_rate": 3.0079181855045818e-05, + "loss": 0.7012, + "step": 12290 + }, + { + "epoch": 0.87, + "learning_rate": 3.0051960700959663e-05, + "loss": 0.7242, + "step": 12300 + }, + { + "epoch": 0.87, + "learning_rate": 3.002473330090511e-05, + "loss": 0.7115, + "step": 12310 + }, + { + "epoch": 0.87, + "learning_rate": 2.999749968854463e-05, + "loss": 0.7444, + "step": 12320 + }, + { + "epoch": 0.87, + "learning_rate": 2.9970259897548374e-05, + "loss": 0.7397, + "step": 12330 + }, + { + "epoch": 0.87, + "learning_rate": 2.9943013961594136e-05, + "loss": 0.7344, + "step": 12340 + }, + { + "epoch": 0.87, + "learning_rate": 2.9915761914367302e-05, + "loss": 0.7216, + "step": 12350 + }, + { + "epoch": 0.87, + "learning_rate": 2.9888503789560808e-05, + "loss": 0.7298, + "step": 12360 + }, + { + "epoch": 0.88, + "learning_rate": 2.986123962087512e-05, + "loss": 0.7572, + "step": 12370 + }, + { + "epoch": 0.88, + "learning_rate": 2.9833969442018168e-05, + "loss": 0.7116, + "step": 12380 + }, + { + "epoch": 0.88, + "learning_rate": 2.9806693286705312e-05, + "loss": 0.7127, + "step": 12390 + }, + { + "epoch": 0.88, + "learning_rate": 2.977941118865929e-05, + "loss": 0.7188, + "step": 12400 + }, + { + "epoch": 0.88, + "learning_rate": 2.9752123181610216e-05, + "loss": 0.7249, + "step": 12410 + }, + { + "epoch": 0.88, + "learning_rate": 2.9724829299295477e-05, + "loss": 0.722, + "step": 12420 + }, + { + "epoch": 0.88, + "learning_rate": 2.9697529575459755e-05, + "loss": 0.7404, + "step": 12430 + }, + { + "epoch": 0.88, + "learning_rate": 2.9670224043854916e-05, + "loss": 0.719, + "step": 12440 + }, + { + "epoch": 0.88, + "learning_rate": 2.9642912738240052e-05, + "loss": 0.7442, + "step": 12450 + }, + { + "epoch": 0.88, + "learning_rate": 2.9615595692381348e-05, + "loss": 0.7398, + "step": 12460 + }, + { + "epoch": 0.88, + "learning_rate": 2.958827294005213e-05, + "loss": 0.7281, + "step": 12470 + }, + { + "epoch": 0.88, + "learning_rate": 2.956094451503274e-05, + "loss": 0.721, + "step": 12480 + }, + { + "epoch": 0.88, + "learning_rate": 2.9533610451110566e-05, + "loss": 0.7184, + "step": 12490 + }, + { + "epoch": 0.88, + "learning_rate": 2.9509005000249595e-05, + "loss": 0.719, + "step": 12500 + }, + { + "epoch": 0.89, + "learning_rate": 2.948166031552126e-05, + "loss": 0.7482, + "step": 12510 + }, + { + "epoch": 0.89, + "learning_rate": 2.9454310089912785e-05, + "loss": 0.7418, + "step": 12520 + }, + { + "epoch": 0.89, + "learning_rate": 2.9426954357238502e-05, + "loss": 0.7526, + "step": 12530 + }, + { + "epoch": 0.89, + "learning_rate": 2.939959315131954e-05, + "loss": 0.725, + "step": 12540 + }, + { + "epoch": 0.89, + "learning_rate": 2.9372226505983802e-05, + "loss": 0.7073, + "step": 12550 + }, + { + "epoch": 0.89, + "learning_rate": 2.934485445506591e-05, + "loss": 0.7359, + "step": 12560 + }, + { + "epoch": 0.89, + "learning_rate": 2.9317477032407188e-05, + "loss": 0.7159, + "step": 12570 + }, + { + "epoch": 0.89, + "learning_rate": 2.9290094271855573e-05, + "loss": 0.7015, + "step": 12580 + }, + { + "epoch": 0.89, + "learning_rate": 2.9262706207265618e-05, + "loss": 0.6919, + "step": 12590 + }, + { + "epoch": 0.89, + "learning_rate": 2.923531287249843e-05, + "loss": 0.7245, + "step": 12600 + }, + { + "epoch": 0.89, + "learning_rate": 2.9207914301421635e-05, + "loss": 0.7212, + "step": 12610 + }, + { + "epoch": 0.89, + "learning_rate": 2.9180510527909334e-05, + "loss": 0.7236, + "step": 12620 + }, + { + "epoch": 0.89, + "learning_rate": 2.915310158584205e-05, + "loss": 0.7417, + "step": 12630 + }, + { + "epoch": 0.89, + "learning_rate": 2.9125687509106702e-05, + "loss": 0.7139, + "step": 12640 + }, + { + "epoch": 0.9, + "learning_rate": 2.9098268331596568e-05, + "loss": 0.7098, + "step": 12650 + }, + { + "epoch": 0.9, + "learning_rate": 2.9070844087211207e-05, + "loss": 0.7271, + "step": 12660 + }, + { + "epoch": 0.9, + "learning_rate": 2.9043414809856463e-05, + "loss": 0.7086, + "step": 12670 + }, + { + "epoch": 0.9, + "learning_rate": 2.901598053344441e-05, + "loss": 0.7483, + "step": 12680 + }, + { + "epoch": 0.9, + "learning_rate": 2.8988541291893267e-05, + "loss": 0.7425, + "step": 12690 + }, + { + "epoch": 0.9, + "learning_rate": 2.896109711912744e-05, + "loss": 0.7201, + "step": 12700 + }, + { + "epoch": 0.9, + "learning_rate": 2.893364804907738e-05, + "loss": 0.7443, + "step": 12710 + }, + { + "epoch": 0.9, + "learning_rate": 2.890619411567964e-05, + "loss": 0.7383, + "step": 12720 + }, + { + "epoch": 0.9, + "learning_rate": 2.8878735352876746e-05, + "loss": 0.7197, + "step": 12730 + }, + { + "epoch": 0.9, + "learning_rate": 2.885127179461723e-05, + "loss": 0.7102, + "step": 12740 + }, + { + "epoch": 0.9, + "learning_rate": 2.882380347485552e-05, + "loss": 0.7379, + "step": 12750 + }, + { + "epoch": 0.9, + "learning_rate": 2.8796330427551958e-05, + "loss": 0.736, + "step": 12760 + }, + { + "epoch": 0.9, + "learning_rate": 2.876885268667272e-05, + "loss": 0.7209, + "step": 12770 + }, + { + "epoch": 0.9, + "learning_rate": 2.8741370286189783e-05, + "loss": 0.7219, + "step": 12780 + }, + { + "epoch": 0.91, + "learning_rate": 2.871388326008088e-05, + "loss": 0.7205, + "step": 12790 + }, + { + "epoch": 0.91, + "learning_rate": 2.868639164232948e-05, + "loss": 0.7213, + "step": 12800 + }, + { + "epoch": 0.91, + "learning_rate": 2.8658895466924707e-05, + "loss": 0.7205, + "step": 12810 + }, + { + "epoch": 0.91, + "learning_rate": 2.8631394767861342e-05, + "loss": 0.7313, + "step": 12820 + }, + { + "epoch": 0.91, + "learning_rate": 2.8603889579139742e-05, + "loss": 0.7155, + "step": 12830 + }, + { + "epoch": 0.91, + "learning_rate": 2.8576379934765824e-05, + "loss": 0.7366, + "step": 12840 + }, + { + "epoch": 0.91, + "learning_rate": 2.8548865868751002e-05, + "loss": 0.7453, + "step": 12850 + }, + { + "epoch": 0.91, + "learning_rate": 2.8521347415112175e-05, + "loss": 0.7412, + "step": 12860 + }, + { + "epoch": 0.91, + "learning_rate": 2.849382460787165e-05, + "loss": 0.7226, + "step": 12870 + }, + { + "epoch": 0.91, + "learning_rate": 2.846629748105713e-05, + "loss": 0.7102, + "step": 12880 + }, + { + "epoch": 0.91, + "learning_rate": 2.8438766068701643e-05, + "loss": 0.7158, + "step": 12890 + }, + { + "epoch": 0.91, + "learning_rate": 2.841123040484353e-05, + "loss": 0.7229, + "step": 12900 + }, + { + "epoch": 0.91, + "learning_rate": 2.8383690523526386e-05, + "loss": 0.7041, + "step": 12910 + }, + { + "epoch": 0.91, + "learning_rate": 2.835614645879901e-05, + "loss": 0.7187, + "step": 12920 + }, + { + "epoch": 0.92, + "learning_rate": 2.8328598244715377e-05, + "loss": 0.7469, + "step": 12930 + }, + { + "epoch": 0.92, + "learning_rate": 2.8301045915334606e-05, + "loss": 0.7331, + "step": 12940 + }, + { + "epoch": 0.92, + "learning_rate": 2.8273489504720885e-05, + "loss": 0.7355, + "step": 12950 + }, + { + "epoch": 0.92, + "learning_rate": 2.8245929046943453e-05, + "loss": 0.7355, + "step": 12960 + }, + { + "epoch": 0.92, + "learning_rate": 2.8218364576076566e-05, + "loss": 0.7246, + "step": 12970 + }, + { + "epoch": 0.92, + "learning_rate": 2.8190796126199415e-05, + "loss": 0.7191, + "step": 12980 + }, + { + "epoch": 0.92, + "learning_rate": 2.8163223731396143e-05, + "loss": 0.719, + "step": 12990 + }, + { + "epoch": 0.92, + "learning_rate": 2.813564742575575e-05, + "loss": 0.7296, + "step": 13000 + }, + { + "epoch": 0.92, + "learning_rate": 2.8108067243372067e-05, + "loss": 0.7325, + "step": 13010 + }, + { + "epoch": 0.92, + "learning_rate": 2.808048321834373e-05, + "loss": 0.7346, + "step": 13020 + }, + { + "epoch": 0.92, + "learning_rate": 2.8052895384774125e-05, + "loss": 0.7191, + "step": 13030 + }, + { + "epoch": 0.92, + "learning_rate": 2.8025303776771333e-05, + "loss": 0.7408, + "step": 13040 + }, + { + "epoch": 0.92, + "learning_rate": 2.7997708428448126e-05, + "loss": 0.7196, + "step": 13050 + }, + { + "epoch": 0.92, + "learning_rate": 2.7970109373921878e-05, + "loss": 0.7324, + "step": 13060 + }, + { + "epoch": 0.93, + "learning_rate": 2.7942506647314547e-05, + "loss": 0.7488, + "step": 13070 + }, + { + "epoch": 0.93, + "learning_rate": 2.7914900282752648e-05, + "loss": 0.717, + "step": 13080 + }, + { + "epoch": 0.93, + "learning_rate": 2.788729031436718e-05, + "loss": 0.7391, + "step": 13090 + }, + { + "epoch": 0.93, + "learning_rate": 2.78596767762936e-05, + "loss": 0.735, + "step": 13100 + }, + { + "epoch": 0.93, + "learning_rate": 2.7832059702671776e-05, + "loss": 0.7312, + "step": 13110 + }, + { + "epoch": 0.93, + "learning_rate": 2.7804439127645955e-05, + "loss": 0.7198, + "step": 13120 + }, + { + "epoch": 0.93, + "learning_rate": 2.7776815085364705e-05, + "loss": 0.7061, + "step": 13130 + }, + { + "epoch": 0.93, + "learning_rate": 2.7749187609980887e-05, + "loss": 0.7045, + "step": 13140 + }, + { + "epoch": 0.93, + "learning_rate": 2.77215567356516e-05, + "loss": 0.7084, + "step": 13150 + }, + { + "epoch": 0.93, + "learning_rate": 2.7693922496538143e-05, + "loss": 0.7186, + "step": 13160 + }, + { + "epoch": 0.93, + "learning_rate": 2.766628492680599e-05, + "loss": 0.7349, + "step": 13170 + }, + { + "epoch": 0.93, + "learning_rate": 2.7638644060624723e-05, + "loss": 0.7177, + "step": 13180 + }, + { + "epoch": 0.93, + "learning_rate": 2.7610999932167993e-05, + "loss": 0.722, + "step": 13190 + }, + { + "epoch": 0.93, + "learning_rate": 2.7583352575613497e-05, + "loss": 0.716, + "step": 13200 + }, + { + "epoch": 0.94, + "learning_rate": 2.7555702025142916e-05, + "loss": 0.7362, + "step": 13210 + }, + { + "epoch": 0.94, + "learning_rate": 2.7528048314941872e-05, + "loss": 0.7387, + "step": 13220 + }, + { + "epoch": 0.94, + "learning_rate": 2.750039147919993e-05, + "loss": 0.7187, + "step": 13230 + }, + { + "epoch": 0.94, + "learning_rate": 2.7472731552110448e-05, + "loss": 0.7194, + "step": 13240 + }, + { + "epoch": 0.94, + "learning_rate": 2.744506856787069e-05, + "loss": 0.7414, + "step": 13250 + }, + { + "epoch": 0.94, + "learning_rate": 2.7417402560681636e-05, + "loss": 0.7284, + "step": 13260 + }, + { + "epoch": 0.94, + "learning_rate": 2.7389733564748043e-05, + "loss": 0.7415, + "step": 13270 + }, + { + "epoch": 0.94, + "learning_rate": 2.7362061614278333e-05, + "loss": 0.7371, + "step": 13280 + }, + { + "epoch": 0.94, + "learning_rate": 2.7334386743484608e-05, + "loss": 0.7564, + "step": 13290 + }, + { + "epoch": 0.94, + "learning_rate": 2.7306708986582553e-05, + "loss": 0.7017, + "step": 13300 + }, + { + "epoch": 0.94, + "learning_rate": 2.7279028377791444e-05, + "loss": 0.7452, + "step": 13310 + }, + { + "epoch": 0.94, + "learning_rate": 2.725134495133407e-05, + "loss": 0.74, + "step": 13320 + }, + { + "epoch": 0.94, + "learning_rate": 2.7223658741436714e-05, + "loss": 0.741, + "step": 13330 + }, + { + "epoch": 0.94, + "learning_rate": 2.719596978232909e-05, + "loss": 0.7338, + "step": 13340 + }, + { + "epoch": 0.94, + "learning_rate": 2.7168278108244318e-05, + "loss": 0.7036, + "step": 13350 + }, + { + "epoch": 0.95, + "learning_rate": 2.714058375341887e-05, + "loss": 0.709, + "step": 13360 + }, + { + "epoch": 0.95, + "learning_rate": 2.7112886752092535e-05, + "loss": 0.7165, + "step": 13370 + }, + { + "epoch": 0.95, + "learning_rate": 2.7085187138508373e-05, + "loss": 0.6954, + "step": 13380 + }, + { + "epoch": 0.95, + "learning_rate": 2.7057484946912676e-05, + "loss": 0.7222, + "step": 13390 + }, + { + "epoch": 0.95, + "learning_rate": 2.7029780211554917e-05, + "loss": 0.7261, + "step": 13400 + }, + { + "epoch": 0.95, + "learning_rate": 2.700207296668772e-05, + "loss": 0.7591, + "step": 13410 + }, + { + "epoch": 0.95, + "learning_rate": 2.6974363246566814e-05, + "loss": 0.7099, + "step": 13420 + }, + { + "epoch": 0.95, + "learning_rate": 2.694665108545098e-05, + "loss": 0.7162, + "step": 13430 + }, + { + "epoch": 0.95, + "learning_rate": 2.6918936517602023e-05, + "loss": 0.7088, + "step": 13440 + }, + { + "epoch": 0.95, + "learning_rate": 2.689121957728471e-05, + "loss": 0.7684, + "step": 13450 + }, + { + "epoch": 0.95, + "learning_rate": 2.686350029876678e-05, + "loss": 0.7023, + "step": 13460 + }, + { + "epoch": 0.95, + "learning_rate": 2.6835778716318804e-05, + "loss": 0.7079, + "step": 13470 + }, + { + "epoch": 0.95, + "learning_rate": 2.680805486421426e-05, + "loss": 0.7105, + "step": 13480 + }, + { + "epoch": 0.95, + "learning_rate": 2.678032877672938e-05, + "loss": 0.7583, + "step": 13490 + }, + { + "epoch": 0.96, + "learning_rate": 2.6752600488143216e-05, + "loss": 0.7468, + "step": 13500 + }, + { + "epoch": 0.96, + "learning_rate": 2.6724870032737475e-05, + "loss": 0.7491, + "step": 13510 + }, + { + "epoch": 0.96, + "learning_rate": 2.6697137444796604e-05, + "loss": 0.716, + "step": 13520 + }, + { + "epoch": 0.96, + "learning_rate": 2.666940275860765e-05, + "loss": 0.7139, + "step": 13530 + }, + { + "epoch": 0.96, + "learning_rate": 2.6641666008460263e-05, + "loss": 0.7253, + "step": 13540 + }, + { + "epoch": 0.96, + "learning_rate": 2.661392722864665e-05, + "loss": 0.7396, + "step": 13550 + }, + { + "epoch": 0.96, + "learning_rate": 2.6586186453461533e-05, + "loss": 0.7135, + "step": 13560 + }, + { + "epoch": 0.96, + "learning_rate": 2.6558443717202076e-05, + "loss": 0.7286, + "step": 13570 + }, + { + "epoch": 0.96, + "learning_rate": 2.6530699054167896e-05, + "loss": 0.7327, + "step": 13580 + }, + { + "epoch": 0.96, + "learning_rate": 2.650295249866097e-05, + "loss": 0.7073, + "step": 13590 + }, + { + "epoch": 0.96, + "learning_rate": 2.647520408498563e-05, + "loss": 0.7145, + "step": 13600 + }, + { + "epoch": 0.96, + "learning_rate": 2.64474538474485e-05, + "loss": 0.7094, + "step": 13610 + }, + { + "epoch": 0.96, + "learning_rate": 2.6419701820358457e-05, + "loss": 0.7216, + "step": 13620 + }, + { + "epoch": 0.96, + "learning_rate": 2.6391948038026587e-05, + "loss": 0.7121, + "step": 13630 + }, + { + "epoch": 0.97, + "learning_rate": 2.6364192534766163e-05, + "loss": 0.7416, + "step": 13640 + }, + { + "epoch": 0.97, + "learning_rate": 2.633643534489256e-05, + "loss": 0.7127, + "step": 13650 + }, + { + "epoch": 0.97, + "learning_rate": 2.630867650272327e-05, + "loss": 0.7175, + "step": 13660 + }, + { + "epoch": 0.97, + "learning_rate": 2.628091604257779e-05, + "loss": 0.7149, + "step": 13670 + }, + { + "epoch": 0.97, + "learning_rate": 2.6253153998777646e-05, + "loss": 0.7207, + "step": 13680 + }, + { + "epoch": 0.97, + "learning_rate": 2.622539040564633e-05, + "loss": 0.7319, + "step": 13690 + }, + { + "epoch": 0.97, + "learning_rate": 2.61976252975092e-05, + "loss": 0.7423, + "step": 13700 + }, + { + "epoch": 0.97, + "learning_rate": 2.6169858708693544e-05, + "loss": 0.7501, + "step": 13710 + }, + { + "epoch": 0.97, + "learning_rate": 2.614209067352844e-05, + "loss": 0.7502, + "step": 13720 + }, + { + "epoch": 0.97, + "learning_rate": 2.6114321226344797e-05, + "loss": 0.7136, + "step": 13730 + }, + { + "epoch": 0.97, + "learning_rate": 2.608655040147521e-05, + "loss": 0.7071, + "step": 13740 + }, + { + "epoch": 0.97, + "learning_rate": 2.6058778233254044e-05, + "loss": 0.7285, + "step": 13750 + }, + { + "epoch": 0.97, + "learning_rate": 2.6031004756017258e-05, + "loss": 0.7562, + "step": 13760 + }, + { + "epoch": 0.97, + "learning_rate": 2.600323000410249e-05, + "loss": 0.7256, + "step": 13770 + }, + { + "epoch": 0.98, + "learning_rate": 2.597545401184891e-05, + "loss": 0.72, + "step": 13780 + }, + { + "epoch": 0.98, + "learning_rate": 2.5947676813597253e-05, + "loss": 0.7321, + "step": 13790 + }, + { + "epoch": 0.98, + "learning_rate": 2.5919898443689712e-05, + "loss": 0.7412, + "step": 13800 + }, + { + "epoch": 0.98, + "learning_rate": 2.5892118936469965e-05, + "loss": 0.7299, + "step": 13810 + }, + { + "epoch": 0.98, + "learning_rate": 2.5864338326283068e-05, + "loss": 0.7262, + "step": 13820 + }, + { + "epoch": 0.98, + "learning_rate": 2.5836556647475453e-05, + "loss": 0.7041, + "step": 13830 + }, + { + "epoch": 0.98, + "learning_rate": 2.580877393439487e-05, + "loss": 0.7359, + "step": 13840 + }, + { + "epoch": 0.98, + "learning_rate": 2.5780990221390355e-05, + "loss": 0.7501, + "step": 13850 + }, + { + "epoch": 0.98, + "learning_rate": 2.5753205542812163e-05, + "loss": 0.7227, + "step": 13860 + }, + { + "epoch": 0.98, + "learning_rate": 2.5725419933011763e-05, + "loss": 0.7348, + "step": 13870 + }, + { + "epoch": 0.98, + "learning_rate": 2.5697633426341762e-05, + "loss": 0.7136, + "step": 13880 + }, + { + "epoch": 0.98, + "learning_rate": 2.5669846057155878e-05, + "loss": 0.7142, + "step": 13890 + }, + { + "epoch": 0.98, + "learning_rate": 2.56420578598089e-05, + "loss": 0.7427, + "step": 13900 + }, + { + "epoch": 0.98, + "learning_rate": 2.5614268868656633e-05, + "loss": 0.7268, + "step": 13910 + }, + { + "epoch": 0.99, + "learning_rate": 2.5586479118055877e-05, + "loss": 0.7031, + "step": 13920 + }, + { + "epoch": 0.99, + "learning_rate": 2.5558688642364353e-05, + "loss": 0.7564, + "step": 13930 + }, + { + "epoch": 0.99, + "learning_rate": 2.5530897475940706e-05, + "loss": 0.7245, + "step": 13940 + }, + { + "epoch": 0.99, + "learning_rate": 2.5503105653144392e-05, + "loss": 0.7307, + "step": 13950 + }, + { + "epoch": 0.99, + "learning_rate": 2.5475313208335728e-05, + "loss": 0.7294, + "step": 13960 + }, + { + "epoch": 0.99, + "learning_rate": 2.544752017587575e-05, + "loss": 0.7223, + "step": 13970 + }, + { + "epoch": 0.99, + "learning_rate": 2.541972659012627e-05, + "loss": 0.7094, + "step": 13980 + }, + { + "epoch": 0.99, + "learning_rate": 2.5391932485449738e-05, + "loss": 0.7137, + "step": 13990 + }, + { + "epoch": 0.99, + "learning_rate": 2.536413789620929e-05, + "loss": 0.7361, + "step": 14000 + }, + { + "epoch": 0.99, + "learning_rate": 2.533634285676862e-05, + "loss": 0.6973, + "step": 14010 + }, + { + "epoch": 0.99, + "learning_rate": 2.530854740149201e-05, + "loss": 0.7166, + "step": 14020 + }, + { + "epoch": 0.99, + "learning_rate": 2.528075156474423e-05, + "loss": 0.7395, + "step": 14030 + }, + { + "epoch": 0.99, + "learning_rate": 2.5252955380890554e-05, + "loss": 0.7196, + "step": 14040 + }, + { + "epoch": 0.99, + "learning_rate": 2.522515888429664e-05, + "loss": 0.6977, + "step": 14050 + }, + { + "epoch": 1.0, + "learning_rate": 2.5197362109328592e-05, + "loss": 0.7156, + "step": 14060 + }, + { + "epoch": 1.0, + "learning_rate": 2.5169565090352792e-05, + "loss": 0.7036, + "step": 14070 + }, + { + "epoch": 1.0, + "learning_rate": 2.5141767861735976e-05, + "loss": 0.7311, + "step": 14080 + }, + { + "epoch": 1.0, + "learning_rate": 2.511397045784512e-05, + "loss": 0.7456, + "step": 14090 + }, + { + "epoch": 1.0, + "learning_rate": 2.5086172913047406e-05, + "loss": 0.7164, + "step": 14100 + }, + { + "epoch": 1.0, + "learning_rate": 2.505837526171021e-05, + "loss": 0.7436, + "step": 14110 + }, + { + "epoch": 1.0, + "learning_rate": 2.503057753820103e-05, + "loss": 0.6857, + "step": 14120 + }, + { + "epoch": 1.0, + "learning_rate": 2.500277977688745e-05, + "loss": 0.7089, + "step": 14130 + }, + { + "epoch": 1.0, + "learning_rate": 2.4974982012137106e-05, + "loss": 0.7336, + "step": 14140 + }, + { + "epoch": 1.0, + "learning_rate": 2.494718427831763e-05, + "loss": 0.6962, + "step": 14150 + }, + { + "epoch": 1.0, + "learning_rate": 2.491938660979664e-05, + "loss": 0.7205, + "step": 14160 + }, + { + "epoch": 1.0, + "learning_rate": 2.4891589040941636e-05, + "loss": 0.7325, + "step": 14170 + }, + { + "epoch": 1.0, + "learning_rate": 2.4863791606120022e-05, + "loss": 0.7169, + "step": 14180 + }, + { + "epoch": 1.0, + "learning_rate": 2.483599433969903e-05, + "loss": 0.7421, + "step": 14190 + }, + { + "epoch": 1.01, + "learning_rate": 2.4808197276045692e-05, + "loss": 0.7531, + "step": 14200 + }, + { + "epoch": 1.01, + "learning_rate": 2.4780400449526762e-05, + "loss": 0.7091, + "step": 14210 + }, + { + "epoch": 1.01, + "learning_rate": 2.4752603894508726e-05, + "loss": 0.7389, + "step": 14220 + }, + { + "epoch": 1.01, + "learning_rate": 2.472480764535773e-05, + "loss": 0.6991, + "step": 14230 + }, + { + "epoch": 1.01, + "learning_rate": 2.4697011736439546e-05, + "loss": 0.7178, + "step": 14240 + }, + { + "epoch": 1.01, + "learning_rate": 2.46692162021195e-05, + "loss": 0.7017, + "step": 14250 + }, + { + "epoch": 1.01, + "learning_rate": 2.464142107676248e-05, + "loss": 0.7451, + "step": 14260 + }, + { + "epoch": 1.01, + "learning_rate": 2.461362639473287e-05, + "loss": 0.7172, + "step": 14270 + }, + { + "epoch": 1.01, + "learning_rate": 2.4585832190394496e-05, + "loss": 0.7445, + "step": 14280 + }, + { + "epoch": 1.01, + "learning_rate": 2.4558038498110584e-05, + "loss": 0.6883, + "step": 14290 + }, + { + "epoch": 1.01, + "learning_rate": 2.4530245352243738e-05, + "loss": 0.6903, + "step": 14300 + }, + { + "epoch": 1.01, + "learning_rate": 2.4502452787155897e-05, + "loss": 0.714, + "step": 14310 + }, + { + "epoch": 1.01, + "learning_rate": 2.447466083720827e-05, + "loss": 0.7174, + "step": 14320 + }, + { + "epoch": 1.01, + "learning_rate": 2.4446869536761296e-05, + "loss": 0.7164, + "step": 14330 + }, + { + "epoch": 1.02, + "learning_rate": 2.4419078920174633e-05, + "loss": 0.746, + "step": 14340 + }, + { + "epoch": 1.02, + "learning_rate": 2.4391289021807078e-05, + "loss": 0.7265, + "step": 14350 + }, + { + "epoch": 1.02, + "learning_rate": 2.436349987601655e-05, + "loss": 0.7462, + "step": 14360 + }, + { + "epoch": 1.02, + "learning_rate": 2.4335711517160013e-05, + "loss": 0.7269, + "step": 14370 + }, + { + "epoch": 1.02, + "learning_rate": 2.4307923979593493e-05, + "loss": 0.7325, + "step": 14380 + }, + { + "epoch": 1.02, + "learning_rate": 2.4280137297671975e-05, + "loss": 0.6914, + "step": 14390 + }, + { + "epoch": 1.02, + "learning_rate": 2.425235150574941e-05, + "loss": 0.7243, + "step": 14400 + }, + { + "epoch": 1.02, + "learning_rate": 2.422456663817863e-05, + "loss": 0.7139, + "step": 14410 + }, + { + "epoch": 1.02, + "learning_rate": 2.4196782729311315e-05, + "loss": 0.7298, + "step": 14420 + }, + { + "epoch": 1.02, + "learning_rate": 2.4168999813497977e-05, + "loss": 0.712, + "step": 14430 + }, + { + "epoch": 1.02, + "learning_rate": 2.414121792508791e-05, + "loss": 0.7355, + "step": 14440 + }, + { + "epoch": 1.02, + "learning_rate": 2.4113437098429118e-05, + "loss": 0.6978, + "step": 14450 + }, + { + "epoch": 1.02, + "learning_rate": 2.408565736786829e-05, + "loss": 0.6907, + "step": 14460 + }, + { + "epoch": 1.02, + "learning_rate": 2.4057878767750767e-05, + "loss": 0.7259, + "step": 14470 + }, + { + "epoch": 1.02, + "learning_rate": 2.4030101332420508e-05, + "loss": 0.7158, + "step": 14480 + }, + { + "epoch": 1.03, + "learning_rate": 2.4002325096220013e-05, + "loss": 0.7329, + "step": 14490 + }, + { + "epoch": 1.03, + "learning_rate": 2.3974550093490295e-05, + "loss": 0.7507, + "step": 14500 + }, + { + "epoch": 1.03, + "learning_rate": 2.3946776358570853e-05, + "loss": 0.7169, + "step": 14510 + }, + { + "epoch": 1.03, + "learning_rate": 2.3919003925799623e-05, + "loss": 0.7391, + "step": 14520 + }, + { + "epoch": 1.03, + "learning_rate": 2.389123282951293e-05, + "loss": 0.729, + "step": 14530 + }, + { + "epoch": 1.03, + "learning_rate": 2.3863463104045422e-05, + "loss": 0.7366, + "step": 14540 + }, + { + "epoch": 1.03, + "learning_rate": 2.383569478373009e-05, + "loss": 0.72, + "step": 14550 + }, + { + "epoch": 1.03, + "learning_rate": 2.380792790289816e-05, + "loss": 0.7108, + "step": 14560 + }, + { + "epoch": 1.03, + "learning_rate": 2.3780162495879094e-05, + "loss": 0.7269, + "step": 14570 + }, + { + "epoch": 1.03, + "learning_rate": 2.3752398597000508e-05, + "loss": 0.7303, + "step": 14580 + }, + { + "epoch": 1.03, + "learning_rate": 2.3724636240588194e-05, + "loss": 0.7183, + "step": 14590 + }, + { + "epoch": 1.03, + "learning_rate": 2.369965146699447e-05, + "loss": 0.6879, + "step": 14600 + }, + { + "epoch": 1.03, + "learning_rate": 2.367189213582869e-05, + "loss": 0.7162, + "step": 14610 + }, + { + "epoch": 1.03, + "learning_rate": 2.3644134446662946e-05, + "loss": 0.7065, + "step": 14620 + }, + { + "epoch": 1.04, + "learning_rate": 2.361637843381536e-05, + "loss": 0.7215, + "step": 14630 + }, + { + "epoch": 1.04, + "learning_rate": 2.358862413160193e-05, + "loss": 0.6991, + "step": 14640 + }, + { + "epoch": 1.04, + "learning_rate": 2.3560871574336586e-05, + "loss": 0.7201, + "step": 14650 + }, + { + "epoch": 1.04, + "learning_rate": 2.353312079633104e-05, + "loss": 0.7008, + "step": 14660 + }, + { + "epoch": 1.04, + "learning_rate": 2.3505371831894863e-05, + "loss": 0.7433, + "step": 14670 + }, + { + "epoch": 1.04, + "learning_rate": 2.3477624715335346e-05, + "loss": 0.7083, + "step": 14680 + }, + { + "epoch": 1.04, + "learning_rate": 2.3449879480957525e-05, + "loss": 0.7103, + "step": 14690 + }, + { + "epoch": 1.04, + "learning_rate": 2.3422136163064094e-05, + "loss": 0.7264, + "step": 14700 + }, + { + "epoch": 1.04, + "learning_rate": 2.3394394795955354e-05, + "loss": 0.7147, + "step": 14710 + }, + { + "epoch": 1.04, + "learning_rate": 2.3366655413929228e-05, + "loss": 0.7317, + "step": 14720 + }, + { + "epoch": 1.04, + "learning_rate": 2.333891805128118e-05, + "loss": 0.7039, + "step": 14730 + }, + { + "epoch": 1.04, + "learning_rate": 2.3311182742304173e-05, + "loss": 0.7199, + "step": 14740 + }, + { + "epoch": 1.04, + "learning_rate": 2.328344952128861e-05, + "loss": 0.7012, + "step": 14750 + }, + { + "epoch": 1.04, + "learning_rate": 2.325571842252235e-05, + "loss": 0.7678, + "step": 14760 + }, + { + "epoch": 1.05, + "learning_rate": 2.32279894802906e-05, + "loss": 0.7147, + "step": 14770 + }, + { + "epoch": 1.05, + "learning_rate": 2.3200262728875925e-05, + "loss": 0.7143, + "step": 14780 + }, + { + "epoch": 1.05, + "learning_rate": 2.3172538202558137e-05, + "loss": 0.6973, + "step": 14790 + }, + { + "epoch": 1.05, + "learning_rate": 2.3144815935614352e-05, + "loss": 0.7037, + "step": 14800 + }, + { + "epoch": 1.05, + "learning_rate": 2.3117095962318864e-05, + "loss": 0.6976, + "step": 14810 + }, + { + "epoch": 1.05, + "learning_rate": 2.308937831694313e-05, + "loss": 0.7638, + "step": 14820 + }, + { + "epoch": 1.05, + "learning_rate": 2.3061663033755725e-05, + "loss": 0.7369, + "step": 14830 + }, + { + "epoch": 1.05, + "learning_rate": 2.3033950147022328e-05, + "loss": 0.7297, + "step": 14840 + }, + { + "epoch": 1.05, + "learning_rate": 2.3006239691005626e-05, + "loss": 0.7307, + "step": 14850 + }, + { + "epoch": 1.05, + "learning_rate": 2.297853169996534e-05, + "loss": 0.7289, + "step": 14860 + }, + { + "epoch": 1.05, + "learning_rate": 2.2950826208158077e-05, + "loss": 0.7271, + "step": 14870 + }, + { + "epoch": 1.05, + "learning_rate": 2.2923123249837423e-05, + "loss": 0.7116, + "step": 14880 + }, + { + "epoch": 1.05, + "learning_rate": 2.2895422859253787e-05, + "loss": 0.7267, + "step": 14890 + }, + { + "epoch": 1.05, + "learning_rate": 2.2867725070654443e-05, + "loss": 0.7217, + "step": 14900 + }, + { + "epoch": 1.06, + "learning_rate": 2.2840029918283398e-05, + "loss": 0.7272, + "step": 14910 + }, + { + "epoch": 1.06, + "learning_rate": 2.2812337436381443e-05, + "loss": 0.7261, + "step": 14920 + }, + { + "epoch": 1.06, + "learning_rate": 2.2784647659186038e-05, + "loss": 0.7273, + "step": 14930 + }, + { + "epoch": 1.06, + "learning_rate": 2.2756960620931332e-05, + "loss": 0.7185, + "step": 14940 + }, + { + "epoch": 1.06, + "learning_rate": 2.272927635584805e-05, + "loss": 0.7266, + "step": 14950 + }, + { + "epoch": 1.06, + "learning_rate": 2.2701594898163505e-05, + "loss": 0.7296, + "step": 14960 + }, + { + "epoch": 1.06, + "learning_rate": 2.2673916282101545e-05, + "loss": 0.7148, + "step": 14970 + }, + { + "epoch": 1.06, + "learning_rate": 2.2646240541882507e-05, + "loss": 0.7427, + "step": 14980 + }, + { + "epoch": 1.06, + "learning_rate": 2.2618567711723165e-05, + "loss": 0.7107, + "step": 14990 + }, + { + "epoch": 1.06, + "learning_rate": 2.2590897825836675e-05, + "loss": 0.7066, + "step": 15000 + }, + { + "epoch": 1.06, + "learning_rate": 2.2563230918432597e-05, + "loss": 0.6984, + "step": 15010 + }, + { + "epoch": 1.06, + "learning_rate": 2.253556702371677e-05, + "loss": 0.7009, + "step": 15020 + }, + { + "epoch": 1.06, + "learning_rate": 2.250790617589134e-05, + "loss": 0.7006, + "step": 15030 + }, + { + "epoch": 1.06, + "learning_rate": 2.2480248409154644e-05, + "loss": 0.7112, + "step": 15040 + }, + { + "epoch": 1.07, + "learning_rate": 2.2452593757701254e-05, + "loss": 0.7061, + "step": 15050 + }, + { + "epoch": 1.07, + "learning_rate": 2.2424942255721863e-05, + "loss": 0.6887, + "step": 15060 + }, + { + "epoch": 1.07, + "learning_rate": 2.239729393740329e-05, + "loss": 0.735, + "step": 15070 + }, + { + "epoch": 1.07, + "learning_rate": 2.2369648836928388e-05, + "loss": 0.7394, + "step": 15080 + }, + { + "epoch": 1.07, + "learning_rate": 2.2342006988476062e-05, + "loss": 0.6979, + "step": 15090 + }, + { + "epoch": 1.07, + "learning_rate": 2.231436842622118e-05, + "loss": 0.7178, + "step": 15100 + }, + { + "epoch": 1.07, + "learning_rate": 2.2286733184334564e-05, + "loss": 0.7372, + "step": 15110 + }, + { + "epoch": 1.07, + "learning_rate": 2.225910129698289e-05, + "loss": 0.7373, + "step": 15120 + }, + { + "epoch": 1.07, + "learning_rate": 2.223147279832874e-05, + "loss": 0.6994, + "step": 15130 + }, + { + "epoch": 1.07, + "learning_rate": 2.2203847722530476e-05, + "loss": 0.7149, + "step": 15140 + }, + { + "epoch": 1.07, + "learning_rate": 2.217622610374223e-05, + "loss": 0.7195, + "step": 15150 + }, + { + "epoch": 1.07, + "learning_rate": 2.2148607976113866e-05, + "loss": 0.7259, + "step": 15160 + }, + { + "epoch": 1.07, + "learning_rate": 2.2120993373790928e-05, + "loss": 0.7363, + "step": 15170 + }, + { + "epoch": 1.07, + "learning_rate": 2.20933823309146e-05, + "loss": 0.7158, + "step": 15180 + }, + { + "epoch": 1.08, + "learning_rate": 2.2065774881621673e-05, + "loss": 0.713, + "step": 15190 + }, + { + "epoch": 1.08, + "learning_rate": 2.2038171060044488e-05, + "loss": 0.7228, + "step": 15200 + }, + { + "epoch": 1.08, + "learning_rate": 2.20105709003109e-05, + "loss": 0.7034, + "step": 15210 + }, + { + "epoch": 1.08, + "learning_rate": 2.198297443654424e-05, + "loss": 0.732, + "step": 15220 + }, + { + "epoch": 1.08, + "learning_rate": 2.1955381702863275e-05, + "loss": 0.6914, + "step": 15230 + }, + { + "epoch": 1.08, + "learning_rate": 2.192779273338215e-05, + "loss": 0.7144, + "step": 15240 + }, + { + "epoch": 1.08, + "learning_rate": 2.190020756221036e-05, + "loss": 0.7084, + "step": 15250 + }, + { + "epoch": 1.08, + "learning_rate": 2.1872626223452708e-05, + "loss": 0.6972, + "step": 15260 + }, + { + "epoch": 1.08, + "learning_rate": 2.184504875120925e-05, + "loss": 0.7054, + "step": 15270 + }, + { + "epoch": 1.08, + "learning_rate": 2.1817475179575285e-05, + "loss": 0.6649, + "step": 15280 + }, + { + "epoch": 1.08, + "learning_rate": 2.178990554264124e-05, + "loss": 0.7261, + "step": 15290 + }, + { + "epoch": 1.08, + "learning_rate": 2.1762339874492732e-05, + "loss": 0.7163, + "step": 15300 + }, + { + "epoch": 1.08, + "learning_rate": 2.1734778209210437e-05, + "loss": 0.7242, + "step": 15310 + }, + { + "epoch": 1.08, + "learning_rate": 2.1707220580870115e-05, + "loss": 0.6934, + "step": 15320 + }, + { + "epoch": 1.09, + "learning_rate": 2.1679667023542483e-05, + "loss": 0.7318, + "step": 15330 + }, + { + "epoch": 1.09, + "learning_rate": 2.1652117571293273e-05, + "loss": 0.7051, + "step": 15340 + }, + { + "epoch": 1.09, + "learning_rate": 2.1624572258183113e-05, + "loss": 0.7365, + "step": 15350 + }, + { + "epoch": 1.09, + "learning_rate": 2.1597031118267546e-05, + "loss": 0.6866, + "step": 15360 + }, + { + "epoch": 1.09, + "learning_rate": 2.1569494185596904e-05, + "loss": 0.72, + "step": 15370 + }, + { + "epoch": 1.09, + "learning_rate": 2.1541961494216364e-05, + "loss": 0.7119, + "step": 15380 + }, + { + "epoch": 1.09, + "learning_rate": 2.151443307816584e-05, + "loss": 0.6931, + "step": 15390 + }, + { + "epoch": 1.09, + "learning_rate": 2.1486908971479967e-05, + "loss": 0.6874, + "step": 15400 + }, + { + "epoch": 1.09, + "learning_rate": 2.1459389208188044e-05, + "loss": 0.7406, + "step": 15410 + }, + { + "epoch": 1.09, + "learning_rate": 2.1431873822314e-05, + "loss": 0.6964, + "step": 15420 + }, + { + "epoch": 1.09, + "learning_rate": 2.1404362847876356e-05, + "loss": 0.686, + "step": 15430 + }, + { + "epoch": 1.09, + "learning_rate": 2.137685631888819e-05, + "loss": 0.739, + "step": 15440 + }, + { + "epoch": 1.09, + "learning_rate": 2.1349354269357063e-05, + "loss": 0.7261, + "step": 15450 + }, + { + "epoch": 1.09, + "learning_rate": 2.1321856733285004e-05, + "loss": 0.7069, + "step": 15460 + }, + { + "epoch": 1.1, + "learning_rate": 2.1294363744668476e-05, + "loss": 0.7227, + "step": 15470 + }, + { + "epoch": 1.1, + "learning_rate": 2.1266875337498306e-05, + "loss": 0.7378, + "step": 15480 + }, + { + "epoch": 1.1, + "learning_rate": 2.1239391545759653e-05, + "loss": 0.6962, + "step": 15490 + }, + { + "epoch": 1.1, + "learning_rate": 2.121191240343198e-05, + "loss": 0.6828, + "step": 15500 + }, + { + "epoch": 1.1, + "learning_rate": 2.1184437944489002e-05, + "loss": 0.7323, + "step": 15510 + }, + { + "epoch": 1.1, + "learning_rate": 2.1156968202898645e-05, + "loss": 0.7342, + "step": 15520 + }, + { + "epoch": 1.1, + "learning_rate": 2.1129503212622983e-05, + "loss": 0.7187, + "step": 15530 + }, + { + "epoch": 1.1, + "learning_rate": 2.1102043007618235e-05, + "loss": 0.7252, + "step": 15540 + }, + { + "epoch": 1.1, + "learning_rate": 2.1074587621834707e-05, + "loss": 0.6976, + "step": 15550 + }, + { + "epoch": 1.1, + "learning_rate": 2.104713708921673e-05, + "loss": 0.717, + "step": 15560 + }, + { + "epoch": 1.1, + "learning_rate": 2.1019691443702665e-05, + "loss": 0.6944, + "step": 15570 + }, + { + "epoch": 1.1, + "learning_rate": 2.0992250719224775e-05, + "loss": 0.7005, + "step": 15580 + }, + { + "epoch": 1.1, + "learning_rate": 2.09648149497093e-05, + "loss": 0.6812, + "step": 15590 + }, + { + "epoch": 1.1, + "learning_rate": 2.093738416907631e-05, + "loss": 0.7119, + "step": 15600 + }, + { + "epoch": 1.1, + "learning_rate": 2.0909958411239747e-05, + "loss": 0.7323, + "step": 15610 + }, + { + "epoch": 1.11, + "learning_rate": 2.08825377101073e-05, + "loss": 0.7042, + "step": 15620 + }, + { + "epoch": 1.11, + "learning_rate": 2.085512209958044e-05, + "loss": 0.7251, + "step": 15630 + }, + { + "epoch": 1.11, + "learning_rate": 2.0827711613554313e-05, + "loss": 0.7128, + "step": 15640 + }, + { + "epoch": 1.11, + "learning_rate": 2.080030628591777e-05, + "loss": 0.72, + "step": 15650 + }, + { + "epoch": 1.11, + "learning_rate": 2.077290615055325e-05, + "loss": 0.7159, + "step": 15660 + }, + { + "epoch": 1.11, + "learning_rate": 2.0745511241336787e-05, + "loss": 0.699, + "step": 15670 + }, + { + "epoch": 1.11, + "learning_rate": 2.0718121592137946e-05, + "loss": 0.7279, + "step": 15680 + }, + { + "epoch": 1.11, + "learning_rate": 2.0690737236819807e-05, + "loss": 0.7172, + "step": 15690 + }, + { + "epoch": 1.11, + "learning_rate": 2.0663358209238877e-05, + "loss": 0.7168, + "step": 15700 + }, + { + "epoch": 1.11, + "learning_rate": 2.0635984543245092e-05, + "loss": 0.7198, + "step": 15710 + }, + { + "epoch": 1.11, + "learning_rate": 2.0608616272681768e-05, + "loss": 0.7304, + "step": 15720 + }, + { + "epoch": 1.11, + "learning_rate": 2.0581253431385546e-05, + "loss": 0.7136, + "step": 15730 + }, + { + "epoch": 1.11, + "learning_rate": 2.055389605318633e-05, + "loss": 0.7061, + "step": 15740 + }, + { + "epoch": 1.11, + "learning_rate": 2.0526544171907293e-05, + "loss": 0.7266, + "step": 15750 + }, + { + "epoch": 1.12, + "learning_rate": 2.0499197821364813e-05, + "loss": 0.6983, + "step": 15760 + }, + { + "epoch": 1.12, + "learning_rate": 2.0471857035368435e-05, + "loss": 0.7496, + "step": 15770 + }, + { + "epoch": 1.12, + "learning_rate": 2.0444521847720797e-05, + "loss": 0.7285, + "step": 15780 + }, + { + "epoch": 1.12, + "learning_rate": 2.0417192292217632e-05, + "loss": 0.7089, + "step": 15790 + }, + { + "epoch": 1.12, + "learning_rate": 2.0389868402647725e-05, + "loss": 0.7189, + "step": 15800 + }, + { + "epoch": 1.12, + "learning_rate": 2.0362550212792837e-05, + "loss": 0.7422, + "step": 15810 + }, + { + "epoch": 1.12, + "learning_rate": 2.033523775642768e-05, + "loss": 0.7565, + "step": 15820 + }, + { + "epoch": 1.12, + "learning_rate": 2.030793106731988e-05, + "loss": 0.7099, + "step": 15830 + }, + { + "epoch": 1.12, + "learning_rate": 2.0280630179229948e-05, + "loss": 0.7139, + "step": 15840 + }, + { + "epoch": 1.12, + "learning_rate": 2.0253335125911204e-05, + "loss": 0.7106, + "step": 15850 + }, + { + "epoch": 1.12, + "learning_rate": 2.022604594110978e-05, + "loss": 0.7057, + "step": 15860 + }, + { + "epoch": 1.12, + "learning_rate": 2.0198762658564505e-05, + "loss": 0.7363, + "step": 15870 + }, + { + "epoch": 1.12, + "learning_rate": 2.0171485312006962e-05, + "loss": 0.6854, + "step": 15880 + }, + { + "epoch": 1.12, + "learning_rate": 2.0144213935161353e-05, + "loss": 0.7171, + "step": 15890 + }, + { + "epoch": 1.13, + "learning_rate": 2.0116948561744548e-05, + "loss": 0.7322, + "step": 15900 + }, + { + "epoch": 1.13, + "learning_rate": 2.0089689225465942e-05, + "loss": 0.7034, + "step": 15910 + }, + { + "epoch": 1.13, + "learning_rate": 2.0062435960027497e-05, + "loss": 0.7279, + "step": 15920 + }, + { + "epoch": 1.13, + "learning_rate": 2.0035188799123657e-05, + "loss": 0.6928, + "step": 15930 + }, + { + "epoch": 1.13, + "learning_rate": 2.0007947776441344e-05, + "loss": 0.7158, + "step": 15940 + }, + { + "epoch": 1.13, + "learning_rate": 1.9980712925659854e-05, + "loss": 0.7355, + "step": 15950 + }, + { + "epoch": 1.13, + "learning_rate": 1.9953484280450865e-05, + "loss": 0.7238, + "step": 15960 + }, + { + "epoch": 1.13, + "learning_rate": 1.9926261874478403e-05, + "loss": 0.7005, + "step": 15970 + }, + { + "epoch": 1.13, + "learning_rate": 1.9899045741398764e-05, + "loss": 0.7617, + "step": 15980 + }, + { + "epoch": 1.13, + "learning_rate": 1.9871835914860473e-05, + "loss": 0.7366, + "step": 15990 + }, + { + "epoch": 1.13, + "learning_rate": 1.9844632428504282e-05, + "loss": 0.7069, + "step": 16000 + }, + { + "epoch": 1.13, + "learning_rate": 1.98174353159631e-05, + "loss": 0.7133, + "step": 16010 + }, + { + "epoch": 1.13, + "learning_rate": 1.9790244610861956e-05, + "loss": 0.6999, + "step": 16020 + }, + { + "epoch": 1.13, + "learning_rate": 1.9763060346817946e-05, + "loss": 0.7152, + "step": 16030 + }, + { + "epoch": 1.14, + "learning_rate": 1.97358825574402e-05, + "loss": 0.7114, + "step": 16040 + }, + { + "epoch": 1.14, + "learning_rate": 1.9708711276329876e-05, + "loss": 0.7118, + "step": 16050 + }, + { + "epoch": 1.14, + "learning_rate": 1.968154653708005e-05, + "loss": 0.6991, + "step": 16060 + }, + { + "epoch": 1.14, + "learning_rate": 1.9654388373275724e-05, + "loss": 0.716, + "step": 16070 + }, + { + "epoch": 1.14, + "learning_rate": 1.9627236818493757e-05, + "loss": 0.7283, + "step": 16080 + }, + { + "epoch": 1.14, + "learning_rate": 1.9600091906302866e-05, + "loss": 0.6877, + "step": 16090 + }, + { + "epoch": 1.14, + "learning_rate": 1.9572953670263543e-05, + "loss": 0.6961, + "step": 16100 + }, + { + "epoch": 1.14, + "learning_rate": 1.9545822143927996e-05, + "loss": 0.705, + "step": 16110 + }, + { + "epoch": 1.14, + "learning_rate": 1.9518697360840184e-05, + "loss": 0.7358, + "step": 16120 + }, + { + "epoch": 1.14, + "learning_rate": 1.9491579354535704e-05, + "loss": 0.7076, + "step": 16130 + }, + { + "epoch": 1.14, + "learning_rate": 1.946446815854177e-05, + "loss": 0.7408, + "step": 16140 + }, + { + "epoch": 1.14, + "learning_rate": 1.9437363806377202e-05, + "loss": 0.7195, + "step": 16150 + }, + { + "epoch": 1.14, + "learning_rate": 1.9410266331552324e-05, + "loss": 0.707, + "step": 16160 + }, + { + "epoch": 1.14, + "learning_rate": 1.9383175767568974e-05, + "loss": 0.709, + "step": 16170 + }, + { + "epoch": 1.15, + "learning_rate": 1.935609214792046e-05, + "loss": 0.7466, + "step": 16180 + }, + { + "epoch": 1.15, + "learning_rate": 1.932901550609149e-05, + "loss": 0.7404, + "step": 16190 + }, + { + "epoch": 1.15, + "learning_rate": 1.9301945875558136e-05, + "loss": 0.7121, + "step": 16200 + }, + { + "epoch": 1.15, + "learning_rate": 1.9274883289787807e-05, + "loss": 0.7256, + "step": 16210 + }, + { + "epoch": 1.15, + "learning_rate": 1.924782778223922e-05, + "loss": 0.6996, + "step": 16220 + }, + { + "epoch": 1.15, + "learning_rate": 1.922077938636233e-05, + "loss": 0.7491, + "step": 16230 + }, + { + "epoch": 1.15, + "learning_rate": 1.919373813559828e-05, + "loss": 0.7379, + "step": 16240 + }, + { + "epoch": 1.15, + "learning_rate": 1.9166704063379398e-05, + "loss": 0.711, + "step": 16250 + }, + { + "epoch": 1.15, + "learning_rate": 1.9139677203129146e-05, + "loss": 0.7174, + "step": 16260 + }, + { + "epoch": 1.15, + "learning_rate": 1.9112657588262064e-05, + "loss": 0.7062, + "step": 16270 + }, + { + "epoch": 1.15, + "learning_rate": 1.9085645252183716e-05, + "loss": 0.7164, + "step": 16280 + }, + { + "epoch": 1.15, + "learning_rate": 1.905864022829067e-05, + "loss": 0.6892, + "step": 16290 + }, + { + "epoch": 1.15, + "learning_rate": 1.9031642549970484e-05, + "loss": 0.7483, + "step": 16300 + }, + { + "epoch": 1.15, + "learning_rate": 1.9004652250601612e-05, + "loss": 0.7138, + "step": 16310 + }, + { + "epoch": 1.16, + "learning_rate": 1.897766936355337e-05, + "loss": 0.7318, + "step": 16320 + }, + { + "epoch": 1.16, + "learning_rate": 1.8950693922185938e-05, + "loss": 0.7191, + "step": 16330 + }, + { + "epoch": 1.16, + "learning_rate": 1.892372595985028e-05, + "loss": 0.7121, + "step": 16340 + }, + { + "epoch": 1.16, + "learning_rate": 1.8896765509888114e-05, + "loss": 0.6814, + "step": 16350 + }, + { + "epoch": 1.16, + "learning_rate": 1.8869812605631854e-05, + "loss": 0.7087, + "step": 16360 + }, + { + "epoch": 1.16, + "learning_rate": 1.8842867280404614e-05, + "loss": 0.7421, + "step": 16370 + }, + { + "epoch": 1.16, + "learning_rate": 1.8815929567520118e-05, + "loss": 0.7249, + "step": 16380 + }, + { + "epoch": 1.16, + "learning_rate": 1.878899950028269e-05, + "loss": 0.7133, + "step": 16390 + }, + { + "epoch": 1.16, + "learning_rate": 1.876207711198718e-05, + "loss": 0.7258, + "step": 16400 + }, + { + "epoch": 1.16, + "learning_rate": 1.873516243591897e-05, + "loss": 0.7109, + "step": 16410 + }, + { + "epoch": 1.16, + "learning_rate": 1.870825550535389e-05, + "loss": 0.7226, + "step": 16420 + }, + { + "epoch": 1.16, + "learning_rate": 1.8681356353558203e-05, + "loss": 0.7491, + "step": 16430 + }, + { + "epoch": 1.16, + "learning_rate": 1.8654465013788565e-05, + "loss": 0.7171, + "step": 16440 + }, + { + "epoch": 1.16, + "learning_rate": 1.862758151929194e-05, + "loss": 0.7179, + "step": 16450 + }, + { + "epoch": 1.17, + "learning_rate": 1.860070590330562e-05, + "loss": 0.6968, + "step": 16460 + }, + { + "epoch": 1.17, + "learning_rate": 1.857383819905715e-05, + "loss": 0.6621, + "step": 16470 + }, + { + "epoch": 1.17, + "learning_rate": 1.85469784397643e-05, + "loss": 0.7086, + "step": 16480 + }, + { + "epoch": 1.17, + "learning_rate": 1.8520126658635e-05, + "loss": 0.747, + "step": 16490 + }, + { + "epoch": 1.17, + "learning_rate": 1.849328288886732e-05, + "loss": 0.7053, + "step": 16500 + }, + { + "epoch": 1.17, + "learning_rate": 1.8466447163649447e-05, + "loss": 0.7356, + "step": 16510 + }, + { + "epoch": 1.17, + "learning_rate": 1.8439619516159605e-05, + "loss": 0.7242, + "step": 16520 + }, + { + "epoch": 1.17, + "learning_rate": 1.841279997956602e-05, + "loss": 0.7214, + "step": 16530 + }, + { + "epoch": 1.17, + "learning_rate": 1.8385988587026908e-05, + "loss": 0.7189, + "step": 16540 + }, + { + "epoch": 1.17, + "learning_rate": 1.8359185371690418e-05, + "loss": 0.7264, + "step": 16550 + }, + { + "epoch": 1.17, + "learning_rate": 1.8332390366694587e-05, + "loss": 0.7173, + "step": 16560 + }, + { + "epoch": 1.17, + "learning_rate": 1.8305603605167268e-05, + "loss": 0.7327, + "step": 16570 + }, + { + "epoch": 1.17, + "learning_rate": 1.827882512022618e-05, + "loss": 0.6935, + "step": 16580 + }, + { + "epoch": 1.17, + "learning_rate": 1.825205494497877e-05, + "loss": 0.7185, + "step": 16590 + }, + { + "epoch": 1.17, + "learning_rate": 1.8225293112522222e-05, + "loss": 0.7138, + "step": 16600 + }, + { + "epoch": 1.18, + "learning_rate": 1.819853965594339e-05, + "loss": 0.6779, + "step": 16610 + }, + { + "epoch": 1.18, + "learning_rate": 1.8171794608318813e-05, + "loss": 0.7251, + "step": 16620 + }, + { + "epoch": 1.18, + "learning_rate": 1.8145058002714587e-05, + "loss": 0.7461, + "step": 16630 + }, + { + "epoch": 1.18, + "learning_rate": 1.8118329872186412e-05, + "loss": 0.7335, + "step": 16640 + }, + { + "epoch": 1.18, + "learning_rate": 1.809161024977946e-05, + "loss": 0.6869, + "step": 16650 + }, + { + "epoch": 1.18, + "learning_rate": 1.8064899168528438e-05, + "loss": 0.7236, + "step": 16660 + }, + { + "epoch": 1.18, + "learning_rate": 1.8038196661457456e-05, + "loss": 0.7197, + "step": 16670 + }, + { + "epoch": 1.18, + "learning_rate": 1.8011502761580056e-05, + "loss": 0.6936, + "step": 16680 + }, + { + "epoch": 1.18, + "learning_rate": 1.7984817501899084e-05, + "loss": 0.7115, + "step": 16690 + }, + { + "epoch": 1.18, + "learning_rate": 1.7958140915406764e-05, + "loss": 0.7299, + "step": 16700 + }, + { + "epoch": 1.18, + "learning_rate": 1.793147303508456e-05, + "loss": 0.7225, + "step": 16710 + }, + { + "epoch": 1.18, + "learning_rate": 1.7904813893903194e-05, + "loss": 0.7156, + "step": 16720 + }, + { + "epoch": 1.18, + "learning_rate": 1.7878163524822566e-05, + "loss": 0.7347, + "step": 16730 + }, + { + "epoch": 1.18, + "learning_rate": 1.785152196079174e-05, + "loss": 0.7126, + "step": 16740 + }, + { + "epoch": 1.19, + "learning_rate": 1.7824889234748875e-05, + "loss": 0.6867, + "step": 16750 + }, + { + "epoch": 1.19, + "learning_rate": 1.7798265379621244e-05, + "loss": 0.7394, + "step": 16760 + }, + { + "epoch": 1.19, + "learning_rate": 1.777165042832512e-05, + "loss": 0.6824, + "step": 16770 + }, + { + "epoch": 1.19, + "learning_rate": 1.7745044413765766e-05, + "loss": 0.7436, + "step": 16780 + }, + { + "epoch": 1.19, + "learning_rate": 1.7718447368837415e-05, + "loss": 0.7273, + "step": 16790 + }, + { + "epoch": 1.19, + "learning_rate": 1.7691859326423198e-05, + "loss": 0.7286, + "step": 16800 + }, + { + "epoch": 1.19, + "learning_rate": 1.766528031939513e-05, + "loss": 0.7027, + "step": 16810 + }, + { + "epoch": 1.19, + "learning_rate": 1.7638710380614016e-05, + "loss": 0.7411, + "step": 16820 + }, + { + "epoch": 1.19, + "learning_rate": 1.7612149542929506e-05, + "loss": 0.7129, + "step": 16830 + }, + { + "epoch": 1.19, + "learning_rate": 1.758559783917996e-05, + "loss": 0.7052, + "step": 16840 + }, + { + "epoch": 1.19, + "learning_rate": 1.7559055302192458e-05, + "loss": 0.6975, + "step": 16850 + }, + { + "epoch": 1.19, + "learning_rate": 1.753252196478273e-05, + "loss": 0.7084, + "step": 16860 + }, + { + "epoch": 1.19, + "learning_rate": 1.7505997859755162e-05, + "loss": 0.7196, + "step": 16870 + }, + { + "epoch": 1.19, + "learning_rate": 1.7479483019902697e-05, + "loss": 0.7339, + "step": 16880 + }, + { + "epoch": 1.2, + "learning_rate": 1.745297747800686e-05, + "loss": 0.7055, + "step": 16890 + }, + { + "epoch": 1.2, + "learning_rate": 1.742648126683762e-05, + "loss": 0.7098, + "step": 16900 + }, + { + "epoch": 1.2, + "learning_rate": 1.739999441915347e-05, + "loss": 0.7118, + "step": 16910 + }, + { + "epoch": 1.2, + "learning_rate": 1.737351696770129e-05, + "loss": 0.7336, + "step": 16920 + }, + { + "epoch": 1.2, + "learning_rate": 1.734704894521637e-05, + "loss": 0.68, + "step": 16930 + }, + { + "epoch": 1.2, + "learning_rate": 1.7320590384422316e-05, + "loss": 0.7092, + "step": 16940 + }, + { + "epoch": 1.2, + "learning_rate": 1.7294141318031053e-05, + "loss": 0.7029, + "step": 16950 + }, + { + "epoch": 1.2, + "learning_rate": 1.7267701778742752e-05, + "loss": 0.7408, + "step": 16960 + }, + { + "epoch": 1.2, + "learning_rate": 1.724127179924584e-05, + "loss": 0.7186, + "step": 16970 + }, + { + "epoch": 1.2, + "learning_rate": 1.7214851412216877e-05, + "loss": 0.716, + "step": 16980 + }, + { + "epoch": 1.2, + "learning_rate": 1.7188440650320596e-05, + "loss": 0.7324, + "step": 16990 + }, + { + "epoch": 1.2, + "learning_rate": 1.716203954620982e-05, + "loss": 0.7048, + "step": 17000 + }, + { + "epoch": 1.2, + "learning_rate": 1.7135648132525434e-05, + "loss": 0.7059, + "step": 17010 + }, + { + "epoch": 1.2, + "learning_rate": 1.7109266441896346e-05, + "loss": 0.7062, + "step": 17020 + }, + { + "epoch": 1.21, + "learning_rate": 1.7082894506939423e-05, + "loss": 0.7157, + "step": 17030 + }, + { + "epoch": 1.21, + "learning_rate": 1.7056532360259504e-05, + "loss": 0.72, + "step": 17040 + }, + { + "epoch": 1.21, + "learning_rate": 1.7030180034449294e-05, + "loss": 0.7024, + "step": 17050 + }, + { + "epoch": 1.21, + "learning_rate": 1.700383756208938e-05, + "loss": 0.7169, + "step": 17060 + }, + { + "epoch": 1.21, + "learning_rate": 1.6977504975748147e-05, + "loss": 0.7279, + "step": 17070 + }, + { + "epoch": 1.21, + "learning_rate": 1.695118230798177e-05, + "loss": 0.6765, + "step": 17080 + }, + { + "epoch": 1.21, + "learning_rate": 1.6924869591334168e-05, + "loss": 0.716, + "step": 17090 + }, + { + "epoch": 1.21, + "learning_rate": 1.6898566858336942e-05, + "loss": 0.7001, + "step": 17100 + }, + { + "epoch": 1.21, + "learning_rate": 1.6872274141509342e-05, + "loss": 0.7301, + "step": 17110 + }, + { + "epoch": 1.21, + "learning_rate": 1.6845991473358264e-05, + "loss": 0.6949, + "step": 17120 + }, + { + "epoch": 1.21, + "learning_rate": 1.681971888637815e-05, + "loss": 0.7389, + "step": 17130 + }, + { + "epoch": 1.21, + "learning_rate": 1.6793456413051016e-05, + "loss": 0.7423, + "step": 17140 + }, + { + "epoch": 1.21, + "learning_rate": 1.6767204085846324e-05, + "loss": 0.7027, + "step": 17150 + }, + { + "epoch": 1.21, + "learning_rate": 1.674096193722103e-05, + "loss": 0.7062, + "step": 17160 + }, + { + "epoch": 1.22, + "learning_rate": 1.671472999961949e-05, + "loss": 0.722, + "step": 17170 + }, + { + "epoch": 1.22, + "learning_rate": 1.668850830547345e-05, + "loss": 0.7278, + "step": 17180 + }, + { + "epoch": 1.22, + "learning_rate": 1.6662296887201967e-05, + "loss": 0.7161, + "step": 17190 + }, + { + "epoch": 1.22, + "learning_rate": 1.6636095777211413e-05, + "loss": 0.7267, + "step": 17200 + }, + { + "epoch": 1.22, + "learning_rate": 1.660990500789541e-05, + "loss": 0.7356, + "step": 17210 + }, + { + "epoch": 1.22, + "learning_rate": 1.6583724611634804e-05, + "loss": 0.7245, + "step": 17220 + }, + { + "epoch": 1.22, + "learning_rate": 1.6557554620797596e-05, + "loss": 0.6979, + "step": 17230 + }, + { + "epoch": 1.22, + "learning_rate": 1.6531395067738934e-05, + "loss": 0.6995, + "step": 17240 + }, + { + "epoch": 1.22, + "learning_rate": 1.650524598480106e-05, + "loss": 0.72, + "step": 17250 + }, + { + "epoch": 1.22, + "learning_rate": 1.647910740431329e-05, + "loss": 0.7217, + "step": 17260 + }, + { + "epoch": 1.22, + "learning_rate": 1.645297935859192e-05, + "loss": 0.7191, + "step": 17270 + }, + { + "epoch": 1.22, + "learning_rate": 1.6426861879940235e-05, + "loss": 0.7095, + "step": 17280 + }, + { + "epoch": 1.22, + "learning_rate": 1.640075500064848e-05, + "loss": 0.7315, + "step": 17290 + }, + { + "epoch": 1.22, + "learning_rate": 1.637465875299376e-05, + "loss": 0.7221, + "step": 17300 + }, + { + "epoch": 1.23, + "learning_rate": 1.634857316924006e-05, + "loss": 0.7424, + "step": 17310 + }, + { + "epoch": 1.23, + "learning_rate": 1.632249828163816e-05, + "loss": 0.7475, + "step": 17320 + }, + { + "epoch": 1.23, + "learning_rate": 1.6296434122425638e-05, + "loss": 0.7208, + "step": 17330 + }, + { + "epoch": 1.23, + "learning_rate": 1.627038072382679e-05, + "loss": 0.7181, + "step": 17340 + }, + { + "epoch": 1.23, + "learning_rate": 1.6244338118052632e-05, + "loss": 0.7212, + "step": 17350 + }, + { + "epoch": 1.23, + "learning_rate": 1.621830633730079e-05, + "loss": 0.7071, + "step": 17360 + }, + { + "epoch": 1.23, + "learning_rate": 1.6192285413755564e-05, + "loss": 0.7225, + "step": 17370 + }, + { + "epoch": 1.23, + "learning_rate": 1.6166275379587786e-05, + "loss": 0.717, + "step": 17380 + }, + { + "epoch": 1.23, + "learning_rate": 1.6140276266954864e-05, + "loss": 0.7502, + "step": 17390 + }, + { + "epoch": 1.23, + "learning_rate": 1.611428810800065e-05, + "loss": 0.7212, + "step": 17400 + }, + { + "epoch": 1.23, + "learning_rate": 1.608831093485551e-05, + "loss": 0.7458, + "step": 17410 + }, + { + "epoch": 1.23, + "learning_rate": 1.606234477963619e-05, + "loss": 0.7114, + "step": 17420 + }, + { + "epoch": 1.23, + "learning_rate": 1.6036389674445838e-05, + "loss": 0.7317, + "step": 17430 + }, + { + "epoch": 1.23, + "learning_rate": 1.6010445651373918e-05, + "loss": 0.7232, + "step": 17440 + }, + { + "epoch": 1.24, + "learning_rate": 1.598451274249621e-05, + "loss": 0.6824, + "step": 17450 + }, + { + "epoch": 1.24, + "learning_rate": 1.5958590979874733e-05, + "loss": 0.6917, + "step": 17460 + }, + { + "epoch": 1.24, + "learning_rate": 1.5932680395557765e-05, + "loss": 0.7172, + "step": 17470 + }, + { + "epoch": 1.24, + "learning_rate": 1.590678102157972e-05, + "loss": 0.7299, + "step": 17480 + }, + { + "epoch": 1.24, + "learning_rate": 1.5880892889961164e-05, + "loss": 0.7067, + "step": 17490 + }, + { + "epoch": 1.24, + "learning_rate": 1.5855016032708787e-05, + "loss": 0.7049, + "step": 17500 + }, + { + "epoch": 1.24, + "learning_rate": 1.582915048181532e-05, + "loss": 0.703, + "step": 17510 + }, + { + "epoch": 1.24, + "learning_rate": 1.5803296269259503e-05, + "loss": 0.7269, + "step": 17520 + }, + { + "epoch": 1.24, + "learning_rate": 1.5777453427006084e-05, + "loss": 0.6889, + "step": 17530 + }, + { + "epoch": 1.24, + "learning_rate": 1.5751621987005742e-05, + "loss": 0.7249, + "step": 17540 + }, + { + "epoch": 1.24, + "learning_rate": 1.5725801981195062e-05, + "loss": 0.7072, + "step": 17550 + }, + { + "epoch": 1.24, + "learning_rate": 1.569999344149648e-05, + "loss": 0.7059, + "step": 17560 + }, + { + "epoch": 1.24, + "learning_rate": 1.567419639981827e-05, + "loss": 0.7149, + "step": 17570 + }, + { + "epoch": 1.24, + "learning_rate": 1.5648410888054487e-05, + "loss": 0.7222, + "step": 17580 + }, + { + "epoch": 1.25, + "learning_rate": 1.5622636938084927e-05, + "loss": 0.7139, + "step": 17590 + }, + { + "epoch": 1.25, + "learning_rate": 1.5596874581775112e-05, + "loss": 0.722, + "step": 17600 + }, + { + "epoch": 1.25, + "learning_rate": 1.5571123850976184e-05, + "loss": 0.6979, + "step": 17610 + }, + { + "epoch": 1.25, + "learning_rate": 1.5545384777524958e-05, + "loss": 0.7257, + "step": 17620 + }, + { + "epoch": 1.25, + "learning_rate": 1.551965739324381e-05, + "loss": 0.7112, + "step": 17630 + }, + { + "epoch": 1.25, + "learning_rate": 1.549394172994069e-05, + "loss": 0.7098, + "step": 17640 + }, + { + "epoch": 1.25, + "learning_rate": 1.5468237819409028e-05, + "loss": 0.7094, + "step": 17650 + }, + { + "epoch": 1.25, + "learning_rate": 1.5442545693427733e-05, + "loss": 0.7317, + "step": 17660 + }, + { + "epoch": 1.25, + "learning_rate": 1.5416865383761147e-05, + "loss": 0.6859, + "step": 17670 + }, + { + "epoch": 1.25, + "learning_rate": 1.539119692215902e-05, + "loss": 0.7187, + "step": 17680 + }, + { + "epoch": 1.25, + "learning_rate": 1.5365540340356415e-05, + "loss": 0.7159, + "step": 17690 + }, + { + "epoch": 1.25, + "learning_rate": 1.533989567007374e-05, + "loss": 0.6882, + "step": 17700 + }, + { + "epoch": 1.25, + "learning_rate": 1.5314262943016654e-05, + "loss": 0.6969, + "step": 17710 + }, + { + "epoch": 1.25, + "learning_rate": 1.5288642190876086e-05, + "loss": 0.6984, + "step": 17720 + }, + { + "epoch": 1.25, + "learning_rate": 1.526303344532811e-05, + "loss": 0.7349, + "step": 17730 + }, + { + "epoch": 1.26, + "learning_rate": 1.5237436738033984e-05, + "loss": 0.7341, + "step": 17740 + }, + { + "epoch": 1.26, + "learning_rate": 1.5211852100640095e-05, + "loss": 0.7143, + "step": 17750 + }, + { + "epoch": 1.26, + "learning_rate": 1.5186279564777883e-05, + "loss": 0.7081, + "step": 17760 + }, + { + "epoch": 1.26, + "learning_rate": 1.516071916206383e-05, + "loss": 0.6913, + "step": 17770 + }, + { + "epoch": 1.26, + "learning_rate": 1.5135170924099423e-05, + "loss": 0.7063, + "step": 17780 + }, + { + "epoch": 1.26, + "learning_rate": 1.5109634882471118e-05, + "loss": 0.7095, + "step": 17790 + }, + { + "epoch": 1.26, + "learning_rate": 1.5084111068750283e-05, + "loss": 0.7047, + "step": 17800 + }, + { + "epoch": 1.26, + "learning_rate": 1.5058599514493158e-05, + "loss": 0.7433, + "step": 17810 + }, + { + "epoch": 1.26, + "learning_rate": 1.5033100251240833e-05, + "loss": 0.6966, + "step": 17820 + }, + { + "epoch": 1.26, + "learning_rate": 1.500761331051922e-05, + "loss": 0.7162, + "step": 17830 + }, + { + "epoch": 1.26, + "learning_rate": 1.4982138723838973e-05, + "loss": 0.7126, + "step": 17840 + }, + { + "epoch": 1.26, + "learning_rate": 1.4956676522695478e-05, + "loss": 0.6977, + "step": 17850 + }, + { + "epoch": 1.26, + "learning_rate": 1.493122673856881e-05, + "loss": 0.6931, + "step": 17860 + }, + { + "epoch": 1.26, + "learning_rate": 1.4905789402923697e-05, + "loss": 0.7089, + "step": 17870 + }, + { + "epoch": 1.27, + "learning_rate": 1.4880364547209466e-05, + "loss": 0.7247, + "step": 17880 + }, + { + "epoch": 1.27, + "learning_rate": 1.4854952202860033e-05, + "loss": 0.7037, + "step": 17890 + }, + { + "epoch": 1.27, + "learning_rate": 1.4829552401293822e-05, + "loss": 0.7011, + "step": 17900 + }, + { + "epoch": 1.27, + "learning_rate": 1.4804165173913764e-05, + "loss": 0.7118, + "step": 17910 + }, + { + "epoch": 1.27, + "learning_rate": 1.4778790552107236e-05, + "loss": 0.6924, + "step": 17920 + }, + { + "epoch": 1.27, + "learning_rate": 1.4753428567246052e-05, + "loss": 0.72, + "step": 17930 + }, + { + "epoch": 1.27, + "learning_rate": 1.4728079250686366e-05, + "loss": 0.7124, + "step": 17940 + }, + { + "epoch": 1.27, + "learning_rate": 1.470274263376869e-05, + "loss": 0.7015, + "step": 17950 + }, + { + "epoch": 1.27, + "learning_rate": 1.4677418747817847e-05, + "loss": 0.7289, + "step": 17960 + }, + { + "epoch": 1.27, + "learning_rate": 1.4652107624142908e-05, + "loss": 0.709, + "step": 17970 + }, + { + "epoch": 1.27, + "learning_rate": 1.4626809294037147e-05, + "loss": 0.7018, + "step": 17980 + }, + { + "epoch": 1.27, + "learning_rate": 1.4601523788778043e-05, + "loss": 0.7282, + "step": 17990 + }, + { + "epoch": 1.27, + "learning_rate": 1.4576251139627222e-05, + "loss": 0.6876, + "step": 18000 + }, + { + "epoch": 1.27, + "learning_rate": 1.4550991377830426e-05, + "loss": 0.7062, + "step": 18010 + }, + { + "epoch": 1.28, + "learning_rate": 1.4525744534617402e-05, + "loss": 0.7015, + "step": 18020 + }, + { + "epoch": 1.28, + "learning_rate": 1.450051064120199e-05, + "loss": 0.7316, + "step": 18030 + }, + { + "epoch": 1.28, + "learning_rate": 1.4475289728782e-05, + "loss": 0.7131, + "step": 18040 + }, + { + "epoch": 1.28, + "learning_rate": 1.4450081828539208e-05, + "loss": 0.7294, + "step": 18050 + }, + { + "epoch": 1.28, + "learning_rate": 1.442488697163925e-05, + "loss": 0.7204, + "step": 18060 + }, + { + "epoch": 1.28, + "learning_rate": 1.4399705189231691e-05, + "loss": 0.7443, + "step": 18070 + }, + { + "epoch": 1.28, + "learning_rate": 1.437453651244991e-05, + "loss": 0.6726, + "step": 18080 + }, + { + "epoch": 1.28, + "learning_rate": 1.4349380972411092e-05, + "loss": 0.7047, + "step": 18090 + }, + { + "epoch": 1.28, + "learning_rate": 1.4324238600216167e-05, + "loss": 0.7131, + "step": 18100 + }, + { + "epoch": 1.28, + "learning_rate": 1.4299109426949784e-05, + "loss": 0.7373, + "step": 18110 + }, + { + "epoch": 1.28, + "learning_rate": 1.4273993483680287e-05, + "loss": 0.7337, + "step": 18120 + }, + { + "epoch": 1.28, + "learning_rate": 1.4248890801459664e-05, + "loss": 0.7014, + "step": 18130 + }, + { + "epoch": 1.28, + "learning_rate": 1.4223801411323497e-05, + "loss": 0.7327, + "step": 18140 + }, + { + "epoch": 1.28, + "learning_rate": 1.4198725344290928e-05, + "loss": 0.7178, + "step": 18150 + }, + { + "epoch": 1.29, + "learning_rate": 1.4173662631364643e-05, + "loss": 0.7035, + "step": 18160 + }, + { + "epoch": 1.29, + "learning_rate": 1.4148613303530822e-05, + "loss": 0.7009, + "step": 18170 + }, + { + "epoch": 1.29, + "learning_rate": 1.4123577391759083e-05, + "loss": 0.6923, + "step": 18180 + }, + { + "epoch": 1.29, + "learning_rate": 1.4098554927002444e-05, + "loss": 0.6946, + "step": 18190 + }, + { + "epoch": 1.29, + "learning_rate": 1.4073545940197325e-05, + "loss": 0.7287, + "step": 18200 + }, + { + "epoch": 1.29, + "learning_rate": 1.4048550462263482e-05, + "loss": 0.6951, + "step": 18210 + }, + { + "epoch": 1.29, + "learning_rate": 1.4023568524103953e-05, + "loss": 0.7234, + "step": 18220 + }, + { + "epoch": 1.29, + "learning_rate": 1.399860015660503e-05, + "loss": 0.6795, + "step": 18230 + }, + { + "epoch": 1.29, + "learning_rate": 1.3973645390636248e-05, + "loss": 0.7257, + "step": 18240 + }, + { + "epoch": 1.29, + "learning_rate": 1.3948704257050315e-05, + "loss": 0.7613, + "step": 18250 + }, + { + "epoch": 1.29, + "learning_rate": 1.3923776786683118e-05, + "loss": 0.6848, + "step": 18260 + }, + { + "epoch": 1.29, + "learning_rate": 1.3898863010353569e-05, + "loss": 0.7101, + "step": 18270 + }, + { + "epoch": 1.29, + "learning_rate": 1.3873962958863723e-05, + "loss": 0.7361, + "step": 18280 + }, + { + "epoch": 1.29, + "learning_rate": 1.3849076662998648e-05, + "loss": 0.7305, + "step": 18290 + }, + { + "epoch": 1.3, + "learning_rate": 1.3824204153526407e-05, + "loss": 0.7449, + "step": 18300 + }, + { + "epoch": 1.3, + "learning_rate": 1.3799345461198006e-05, + "loss": 0.7034, + "step": 18310 + }, + { + "epoch": 1.3, + "learning_rate": 1.3774500616747366e-05, + "loss": 0.6939, + "step": 18320 + }, + { + "epoch": 1.3, + "learning_rate": 1.3749669650891306e-05, + "loss": 0.7017, + "step": 18330 + }, + { + "epoch": 1.3, + "learning_rate": 1.3724852594329482e-05, + "loss": 0.7159, + "step": 18340 + }, + { + "epoch": 1.3, + "learning_rate": 1.3700049477744343e-05, + "loss": 0.695, + "step": 18350 + }, + { + "epoch": 1.3, + "learning_rate": 1.3675260331801093e-05, + "loss": 0.7316, + "step": 18360 + }, + { + "epoch": 1.3, + "learning_rate": 1.3650485187147694e-05, + "loss": 0.7337, + "step": 18370 + }, + { + "epoch": 1.3, + "learning_rate": 1.3625724074414792e-05, + "loss": 0.7116, + "step": 18380 + }, + { + "epoch": 1.3, + "learning_rate": 1.3600977024215658e-05, + "loss": 0.7163, + "step": 18390 + }, + { + "epoch": 1.3, + "learning_rate": 1.3576244067146193e-05, + "loss": 0.7016, + "step": 18400 + }, + { + "epoch": 1.3, + "learning_rate": 1.3551525233784879e-05, + "loss": 0.7304, + "step": 18410 + }, + { + "epoch": 1.3, + "learning_rate": 1.3526820554692743e-05, + "loss": 0.6948, + "step": 18420 + }, + { + "epoch": 1.3, + "learning_rate": 1.3502130060413293e-05, + "loss": 0.7157, + "step": 18430 + }, + { + "epoch": 1.31, + "learning_rate": 1.34774537814725e-05, + "loss": 0.7297, + "step": 18440 + }, + { + "epoch": 1.31, + "learning_rate": 1.3452791748378767e-05, + "loss": 0.7092, + "step": 18450 + }, + { + "epoch": 1.31, + "learning_rate": 1.3428143991622902e-05, + "loss": 0.728, + "step": 18460 + }, + { + "epoch": 1.31, + "learning_rate": 1.3403510541678055e-05, + "loss": 0.7247, + "step": 18470 + }, + { + "epoch": 1.31, + "learning_rate": 1.3381352694222871e-05, + "loss": 0.7027, + "step": 18480 + }, + { + "epoch": 1.31, + "learning_rate": 1.3356746511109036e-05, + "loss": 0.7078, + "step": 18490 + }, + { + "epoch": 1.31, + "learning_rate": 1.3332154723078139e-05, + "loss": 0.7383, + "step": 18500 + }, + { + "epoch": 1.31, + "learning_rate": 1.3307577360534146e-05, + "loss": 0.7356, + "step": 18510 + }, + { + "epoch": 1.31, + "learning_rate": 1.3283014453863141e-05, + "loss": 0.6898, + "step": 18520 + }, + { + "epoch": 1.31, + "learning_rate": 1.3258466033433384e-05, + "loss": 0.7231, + "step": 18530 + }, + { + "epoch": 1.31, + "learning_rate": 1.323393212959518e-05, + "loss": 0.6927, + "step": 18540 + }, + { + "epoch": 1.31, + "learning_rate": 1.320941277268093e-05, + "loss": 0.7004, + "step": 18550 + }, + { + "epoch": 1.31, + "learning_rate": 1.3184907993005007e-05, + "loss": 0.6777, + "step": 18560 + }, + { + "epoch": 1.31, + "learning_rate": 1.3160417820863807e-05, + "loss": 0.6808, + "step": 18570 + }, + { + "epoch": 1.32, + "learning_rate": 1.3135942286535619e-05, + "loss": 0.7087, + "step": 18580 + }, + { + "epoch": 1.32, + "learning_rate": 1.3111481420280675e-05, + "loss": 0.7246, + "step": 18590 + }, + { + "epoch": 1.32, + "learning_rate": 1.3087035252341035e-05, + "loss": 0.6971, + "step": 18600 + }, + { + "epoch": 1.32, + "learning_rate": 1.3062603812940616e-05, + "loss": 0.7056, + "step": 18610 + }, + { + "epoch": 1.32, + "learning_rate": 1.303818713228513e-05, + "loss": 0.7253, + "step": 18620 + }, + { + "epoch": 1.32, + "learning_rate": 1.3013785240562015e-05, + "loss": 0.6891, + "step": 18630 + }, + { + "epoch": 1.32, + "learning_rate": 1.298939816794043e-05, + "loss": 0.7273, + "step": 18640 + }, + { + "epoch": 1.32, + "learning_rate": 1.2965025944571228e-05, + "loss": 0.7345, + "step": 18650 + }, + { + "epoch": 1.32, + "learning_rate": 1.2940668600586902e-05, + "loss": 0.7106, + "step": 18660 + }, + { + "epoch": 1.32, + "learning_rate": 1.291632616610154e-05, + "loss": 0.6933, + "step": 18670 + }, + { + "epoch": 1.32, + "learning_rate": 1.2891998671210787e-05, + "loss": 0.6973, + "step": 18680 + }, + { + "epoch": 1.32, + "learning_rate": 1.2867686145991831e-05, + "loss": 0.7173, + "step": 18690 + }, + { + "epoch": 1.32, + "learning_rate": 1.2843388620503371e-05, + "loss": 0.7237, + "step": 18700 + }, + { + "epoch": 1.32, + "learning_rate": 1.2819106124785518e-05, + "loss": 0.705, + "step": 18710 + }, + { + "epoch": 1.33, + "learning_rate": 1.2794838688859845e-05, + "loss": 0.7301, + "step": 18720 + }, + { + "epoch": 1.33, + "learning_rate": 1.277058634272926e-05, + "loss": 0.7166, + "step": 18730 + }, + { + "epoch": 1.33, + "learning_rate": 1.2746349116378064e-05, + "loss": 0.7011, + "step": 18740 + }, + { + "epoch": 1.33, + "learning_rate": 1.2722127039771819e-05, + "loss": 0.7219, + "step": 18750 + }, + { + "epoch": 1.33, + "learning_rate": 1.26979201428574e-05, + "loss": 0.7132, + "step": 18760 + }, + { + "epoch": 1.33, + "learning_rate": 1.267372845556287e-05, + "loss": 0.746, + "step": 18770 + }, + { + "epoch": 1.33, + "learning_rate": 1.2649552007797533e-05, + "loss": 0.7277, + "step": 18780 + }, + { + "epoch": 1.33, + "learning_rate": 1.2625390829451805e-05, + "loss": 0.705, + "step": 18790 + }, + { + "epoch": 1.33, + "learning_rate": 1.2601244950397273e-05, + "loss": 0.7349, + "step": 18800 + }, + { + "epoch": 1.33, + "learning_rate": 1.2577114400486561e-05, + "loss": 0.7073, + "step": 18810 + }, + { + "epoch": 1.33, + "learning_rate": 1.2552999209553385e-05, + "loss": 0.7071, + "step": 18820 + }, + { + "epoch": 1.33, + "learning_rate": 1.2528899407412426e-05, + "loss": 0.7241, + "step": 18830 + }, + { + "epoch": 1.33, + "learning_rate": 1.2504815023859387e-05, + "loss": 0.7267, + "step": 18840 + }, + { + "epoch": 1.33, + "learning_rate": 1.2480746088670866e-05, + "loss": 0.6909, + "step": 18850 + }, + { + "epoch": 1.33, + "learning_rate": 1.2456692631604392e-05, + "loss": 0.7326, + "step": 18860 + }, + { + "epoch": 1.34, + "learning_rate": 1.2432654682398348e-05, + "loss": 0.7191, + "step": 18870 + }, + { + "epoch": 1.34, + "learning_rate": 1.2408632270771941e-05, + "loss": 0.6932, + "step": 18880 + }, + { + "epoch": 1.34, + "learning_rate": 1.2384625426425156e-05, + "loss": 0.7072, + "step": 18890 + }, + { + "epoch": 1.34, + "learning_rate": 1.2360634179038751e-05, + "loss": 0.7001, + "step": 18900 + }, + { + "epoch": 1.34, + "learning_rate": 1.2336658558274211e-05, + "loss": 0.6793, + "step": 18910 + }, + { + "epoch": 1.34, + "learning_rate": 1.231269859377367e-05, + "loss": 0.7359, + "step": 18920 + }, + { + "epoch": 1.34, + "learning_rate": 1.2288754315159912e-05, + "loss": 0.707, + "step": 18930 + }, + { + "epoch": 1.34, + "learning_rate": 1.2264825752036344e-05, + "loss": 0.7213, + "step": 18940 + }, + { + "epoch": 1.34, + "learning_rate": 1.2240912933986945e-05, + "loss": 0.7316, + "step": 18950 + }, + { + "epoch": 1.34, + "learning_rate": 1.2217015890576212e-05, + "loss": 0.6816, + "step": 18960 + }, + { + "epoch": 1.34, + "learning_rate": 1.219313465134913e-05, + "loss": 0.7331, + "step": 18970 + }, + { + "epoch": 1.34, + "learning_rate": 1.2169269245831171e-05, + "loss": 0.737, + "step": 18980 + }, + { + "epoch": 1.34, + "learning_rate": 1.214541970352823e-05, + "loss": 0.706, + "step": 18990 + }, + { + "epoch": 1.34, + "learning_rate": 1.2121586053926559e-05, + "loss": 0.7013, + "step": 19000 + }, + { + "epoch": 1.35, + "learning_rate": 1.20977683264928e-05, + "loss": 0.7216, + "step": 19010 + }, + { + "epoch": 1.35, + "learning_rate": 1.2073966550673871e-05, + "loss": 0.7222, + "step": 19020 + }, + { + "epoch": 1.35, + "learning_rate": 1.2050180755897012e-05, + "loss": 0.7237, + "step": 19030 + }, + { + "epoch": 1.35, + "learning_rate": 1.2026410971569655e-05, + "loss": 0.689, + "step": 19040 + }, + { + "epoch": 1.35, + "learning_rate": 1.2002657227079486e-05, + "loss": 0.7145, + "step": 19050 + }, + { + "epoch": 1.35, + "learning_rate": 1.1978919551794318e-05, + "loss": 0.7008, + "step": 19060 + }, + { + "epoch": 1.35, + "learning_rate": 1.195519797506213e-05, + "loss": 0.7272, + "step": 19070 + }, + { + "epoch": 1.35, + "learning_rate": 1.1931492526210988e-05, + "loss": 0.7297, + "step": 19080 + }, + { + "epoch": 1.35, + "learning_rate": 1.1907803234549011e-05, + "loss": 0.6938, + "step": 19090 + }, + { + "epoch": 1.35, + "learning_rate": 1.1884130129364332e-05, + "loss": 0.7154, + "step": 19100 + }, + { + "epoch": 1.35, + "learning_rate": 1.1860473239925097e-05, + "loss": 0.7069, + "step": 19110 + }, + { + "epoch": 1.35, + "learning_rate": 1.1836832595479403e-05, + "loss": 0.685, + "step": 19120 + }, + { + "epoch": 1.35, + "learning_rate": 1.181320822525524e-05, + "loss": 0.7255, + "step": 19130 + }, + { + "epoch": 1.35, + "learning_rate": 1.178960015846048e-05, + "loss": 0.6999, + "step": 19140 + }, + { + "epoch": 1.36, + "learning_rate": 1.1766008424282863e-05, + "loss": 0.7231, + "step": 19150 + }, + { + "epoch": 1.36, + "learning_rate": 1.1742433051889926e-05, + "loss": 0.7174, + "step": 19160 + }, + { + "epoch": 1.36, + "learning_rate": 1.1718874070428961e-05, + "loss": 0.7056, + "step": 19170 + }, + { + "epoch": 1.36, + "learning_rate": 1.1695331509027002e-05, + "loss": 0.7058, + "step": 19180 + }, + { + "epoch": 1.36, + "learning_rate": 1.1671805396790791e-05, + "loss": 0.7217, + "step": 19190 + }, + { + "epoch": 1.36, + "learning_rate": 1.1648295762806743e-05, + "loss": 0.6955, + "step": 19200 + }, + { + "epoch": 1.36, + "learning_rate": 1.1624802636140874e-05, + "loss": 0.7148, + "step": 19210 + }, + { + "epoch": 1.36, + "learning_rate": 1.1601326045838792e-05, + "loss": 0.7097, + "step": 19220 + }, + { + "epoch": 1.36, + "learning_rate": 1.1577866020925685e-05, + "loss": 0.7287, + "step": 19230 + }, + { + "epoch": 1.36, + "learning_rate": 1.1554422590406255e-05, + "loss": 0.7097, + "step": 19240 + }, + { + "epoch": 1.36, + "learning_rate": 1.1530995783264666e-05, + "loss": 0.693, + "step": 19250 + }, + { + "epoch": 1.36, + "learning_rate": 1.1507585628464542e-05, + "loss": 0.7145, + "step": 19260 + }, + { + "epoch": 1.36, + "learning_rate": 1.1484192154948925e-05, + "loss": 0.7282, + "step": 19270 + }, + { + "epoch": 1.36, + "learning_rate": 1.1460815391640237e-05, + "loss": 0.7072, + "step": 19280 + }, + { + "epoch": 1.37, + "learning_rate": 1.1437455367440211e-05, + "loss": 0.7087, + "step": 19290 + }, + { + "epoch": 1.37, + "learning_rate": 1.1414112111229933e-05, + "loss": 0.7145, + "step": 19300 + }, + { + "epoch": 1.37, + "learning_rate": 1.1390785651869704e-05, + "loss": 0.692, + "step": 19310 + }, + { + "epoch": 1.37, + "learning_rate": 1.1367476018199094e-05, + "loss": 0.7257, + "step": 19320 + }, + { + "epoch": 1.37, + "learning_rate": 1.1344183239036876e-05, + "loss": 0.7178, + "step": 19330 + }, + { + "epoch": 1.37, + "learning_rate": 1.1320907343180958e-05, + "loss": 0.6941, + "step": 19340 + }, + { + "epoch": 1.37, + "learning_rate": 1.129764835940838e-05, + "loss": 0.7482, + "step": 19350 + }, + { + "epoch": 1.37, + "learning_rate": 1.1274406316475287e-05, + "loss": 0.7291, + "step": 19360 + }, + { + "epoch": 1.37, + "learning_rate": 1.1251181243116878e-05, + "loss": 0.7153, + "step": 19370 + }, + { + "epoch": 1.37, + "learning_rate": 1.1227973168047362e-05, + "loss": 0.7166, + "step": 19380 + }, + { + "epoch": 1.37, + "learning_rate": 1.1204782119959925e-05, + "loss": 0.7189, + "step": 19390 + }, + { + "epoch": 1.37, + "learning_rate": 1.118160812752672e-05, + "loss": 0.7164, + "step": 19400 + }, + { + "epoch": 1.37, + "learning_rate": 1.1158451219398819e-05, + "loss": 0.7299, + "step": 19410 + }, + { + "epoch": 1.37, + "learning_rate": 1.1135311424206147e-05, + "loss": 0.7305, + "step": 19420 + }, + { + "epoch": 1.38, + "learning_rate": 1.1112188770557474e-05, + "loss": 0.7395, + "step": 19430 + }, + { + "epoch": 1.38, + "learning_rate": 1.1089083287040398e-05, + "loss": 0.6953, + "step": 19440 + }, + { + "epoch": 1.38, + "learning_rate": 1.1065995002221283e-05, + "loss": 0.6945, + "step": 19450 + }, + { + "epoch": 1.38, + "learning_rate": 1.1042923944645217e-05, + "loss": 0.6879, + "step": 19460 + }, + { + "epoch": 1.38, + "learning_rate": 1.101987014283599e-05, + "loss": 0.7195, + "step": 19470 + }, + { + "epoch": 1.38, + "learning_rate": 1.0996833625296066e-05, + "loss": 0.7221, + "step": 19480 + }, + { + "epoch": 1.38, + "learning_rate": 1.097381442050655e-05, + "loss": 0.67, + "step": 19490 + }, + { + "epoch": 1.38, + "learning_rate": 1.0950812556927125e-05, + "loss": 0.7281, + "step": 19500 + }, + { + "epoch": 1.38, + "learning_rate": 1.0927828062996026e-05, + "loss": 0.7209, + "step": 19510 + }, + { + "epoch": 1.38, + "learning_rate": 1.0904860967130034e-05, + "loss": 0.7153, + "step": 19520 + }, + { + "epoch": 1.38, + "learning_rate": 1.0881911297724415e-05, + "loss": 0.7008, + "step": 19530 + }, + { + "epoch": 1.38, + "learning_rate": 1.0858979083152906e-05, + "loss": 0.6992, + "step": 19540 + }, + { + "epoch": 1.38, + "learning_rate": 1.0836064351767609e-05, + "loss": 0.6969, + "step": 19550 + }, + { + "epoch": 1.38, + "learning_rate": 1.0813167131899062e-05, + "loss": 0.7363, + "step": 19560 + }, + { + "epoch": 1.39, + "learning_rate": 1.079028745185614e-05, + "loss": 0.7194, + "step": 19570 + }, + { + "epoch": 1.39, + "learning_rate": 1.0767425339926038e-05, + "loss": 0.6893, + "step": 19580 + }, + { + "epoch": 1.39, + "learning_rate": 1.0744580824374217e-05, + "loss": 0.7197, + "step": 19590 + }, + { + "epoch": 1.39, + "learning_rate": 1.0721753933444376e-05, + "loss": 0.7105, + "step": 19600 + }, + { + "epoch": 1.39, + "learning_rate": 1.0698944695358448e-05, + "loss": 0.6949, + "step": 19610 + }, + { + "epoch": 1.39, + "learning_rate": 1.0676153138316536e-05, + "loss": 0.7077, + "step": 19620 + }, + { + "epoch": 1.39, + "learning_rate": 1.0653379290496872e-05, + "loss": 0.7389, + "step": 19630 + }, + { + "epoch": 1.39, + "learning_rate": 1.0630623180055788e-05, + "loss": 0.7202, + "step": 19640 + }, + { + "epoch": 1.39, + "learning_rate": 1.0607884835127701e-05, + "loss": 0.6841, + "step": 19650 + }, + { + "epoch": 1.39, + "learning_rate": 1.0585164283825075e-05, + "loss": 0.6841, + "step": 19660 + }, + { + "epoch": 1.39, + "learning_rate": 1.0562461554238346e-05, + "loss": 0.7387, + "step": 19670 + }, + { + "epoch": 1.39, + "learning_rate": 1.053977667443592e-05, + "loss": 0.7086, + "step": 19680 + }, + { + "epoch": 1.39, + "learning_rate": 1.0517109672464154e-05, + "loss": 0.6954, + "step": 19690 + }, + { + "epoch": 1.39, + "learning_rate": 1.0494460576347304e-05, + "loss": 0.7152, + "step": 19700 + }, + { + "epoch": 1.4, + "learning_rate": 1.0471829414087462e-05, + "loss": 0.6811, + "step": 19710 + }, + { + "epoch": 1.4, + "learning_rate": 1.0449216213664553e-05, + "loss": 0.6983, + "step": 19720 + }, + { + "epoch": 1.4, + "learning_rate": 1.0426621003036315e-05, + "loss": 0.7382, + "step": 19730 + }, + { + "epoch": 1.4, + "learning_rate": 1.0404043810138242e-05, + "loss": 0.7026, + "step": 19740 + }, + { + "epoch": 1.4, + "learning_rate": 1.0381484662883531e-05, + "loss": 0.7031, + "step": 19750 + }, + { + "epoch": 1.4, + "learning_rate": 1.0358943589163073e-05, + "loss": 0.6844, + "step": 19760 + }, + { + "epoch": 1.4, + "learning_rate": 1.0336420616845426e-05, + "loss": 0.706, + "step": 19770 + }, + { + "epoch": 1.4, + "learning_rate": 1.0313915773776772e-05, + "loss": 0.7197, + "step": 19780 + }, + { + "epoch": 1.4, + "learning_rate": 1.029142908778088e-05, + "loss": 0.6994, + "step": 19790 + }, + { + "epoch": 1.4, + "learning_rate": 1.0268960586659027e-05, + "loss": 0.7121, + "step": 19800 + }, + { + "epoch": 1.4, + "learning_rate": 1.0246510298190063e-05, + "loss": 0.719, + "step": 19810 + }, + { + "epoch": 1.4, + "learning_rate": 1.0224078250130292e-05, + "loss": 0.7186, + "step": 19820 + }, + { + "epoch": 1.4, + "learning_rate": 1.020166447021349e-05, + "loss": 0.7238, + "step": 19830 + }, + { + "epoch": 1.4, + "learning_rate": 1.0179268986150816e-05, + "loss": 0.7045, + "step": 19840 + }, + { + "epoch": 1.41, + "learning_rate": 1.0156891825630818e-05, + "loss": 0.6938, + "step": 19850 + }, + { + "epoch": 1.41, + "learning_rate": 1.0134533016319402e-05, + "loss": 0.6845, + "step": 19860 + }, + { + "epoch": 1.41, + "learning_rate": 1.0112192585859792e-05, + "loss": 0.7167, + "step": 19870 + }, + { + "epoch": 1.41, + "learning_rate": 1.0089870561872464e-05, + "loss": 0.7119, + "step": 19880 + }, + { + "epoch": 1.41, + "learning_rate": 1.0067566971955142e-05, + "loss": 0.7115, + "step": 19890 + }, + { + "epoch": 1.41, + "learning_rate": 1.0045281843682778e-05, + "loss": 0.7203, + "step": 19900 + }, + { + "epoch": 1.41, + "learning_rate": 1.0023015204607491e-05, + "loss": 0.7004, + "step": 19910 + }, + { + "epoch": 1.41, + "learning_rate": 1.0000767082258536e-05, + "loss": 0.7156, + "step": 19920 + }, + { + "epoch": 1.41, + "learning_rate": 9.978537504142266e-06, + "loss": 0.6905, + "step": 19930 + }, + { + "epoch": 1.41, + "learning_rate": 9.956326497742121e-06, + "loss": 0.6819, + "step": 19940 + }, + { + "epoch": 1.41, + "learning_rate": 9.934134090518593e-06, + "loss": 0.6979, + "step": 19950 + }, + { + "epoch": 1.41, + "learning_rate": 9.911960309909152e-06, + "loss": 0.6983, + "step": 19960 + }, + { + "epoch": 1.41, + "learning_rate": 9.889805183328238e-06, + "loss": 0.7176, + "step": 19970 + }, + { + "epoch": 1.41, + "learning_rate": 9.86766873816725e-06, + "loss": 0.6989, + "step": 19980 + }, + { + "epoch": 1.41, + "learning_rate": 9.84555100179449e-06, + "loss": 0.7201, + "step": 19990 + }, + { + "epoch": 1.42, + "learning_rate": 9.823452001555109e-06, + "loss": 0.7361, + "step": 20000 + }, + { + "epoch": 1.42, + "learning_rate": 9.8013717647711e-06, + "loss": 0.7238, + "step": 20010 + }, + { + "epoch": 1.42, + "learning_rate": 9.779310318741267e-06, + "loss": 0.7321, + "step": 20020 + }, + { + "epoch": 1.42, + "learning_rate": 9.75726769074118e-06, + "loss": 0.7064, + "step": 20030 + }, + { + "epoch": 1.42, + "learning_rate": 9.735243908023154e-06, + "loss": 0.6871, + "step": 20040 + }, + { + "epoch": 1.42, + "learning_rate": 9.71323899781616e-06, + "loss": 0.7289, + "step": 20050 + }, + { + "epoch": 1.42, + "learning_rate": 9.691252987325886e-06, + "loss": 0.6958, + "step": 20060 + }, + { + "epoch": 1.42, + "learning_rate": 9.669285903734632e-06, + "loss": 0.7123, + "step": 20070 + }, + { + "epoch": 1.42, + "learning_rate": 9.647337774201312e-06, + "loss": 0.7123, + "step": 20080 + }, + { + "epoch": 1.42, + "learning_rate": 9.625408625861387e-06, + "loss": 0.7064, + "step": 20090 + }, + { + "epoch": 1.42, + "learning_rate": 9.603498485826848e-06, + "loss": 0.7086, + "step": 20100 + }, + { + "epoch": 1.42, + "learning_rate": 9.581607381186203e-06, + "loss": 0.7247, + "step": 20110 + }, + { + "epoch": 1.42, + "learning_rate": 9.559735339004434e-06, + "loss": 0.7389, + "step": 20120 + }, + { + "epoch": 1.42, + "learning_rate": 9.537882386322921e-06, + "loss": 0.7298, + "step": 20130 + }, + { + "epoch": 1.43, + "learning_rate": 9.516048550159463e-06, + "loss": 0.7032, + "step": 20140 + }, + { + "epoch": 1.43, + "learning_rate": 9.494233857508227e-06, + "loss": 0.717, + "step": 20150 + }, + { + "epoch": 1.43, + "learning_rate": 9.472438335339717e-06, + "loss": 0.7182, + "step": 20160 + }, + { + "epoch": 1.43, + "learning_rate": 9.450662010600716e-06, + "loss": 0.7044, + "step": 20170 + }, + { + "epoch": 1.43, + "learning_rate": 9.428904910214278e-06, + "loss": 0.723, + "step": 20180 + }, + { + "epoch": 1.43, + "learning_rate": 9.407167061079702e-06, + "loss": 0.6971, + "step": 20190 + }, + { + "epoch": 1.43, + "learning_rate": 9.385448490072485e-06, + "loss": 0.6989, + "step": 20200 + }, + { + "epoch": 1.43, + "learning_rate": 9.363749224044274e-06, + "loss": 0.7097, + "step": 20210 + }, + { + "epoch": 1.43, + "learning_rate": 9.342069289822852e-06, + "loss": 0.7078, + "step": 20220 + }, + { + "epoch": 1.43, + "learning_rate": 9.32040871421211e-06, + "loss": 0.7118, + "step": 20230 + }, + { + "epoch": 1.43, + "learning_rate": 9.298767523991999e-06, + "loss": 0.7372, + "step": 20240 + }, + { + "epoch": 1.43, + "learning_rate": 9.277145745918528e-06, + "loss": 0.707, + "step": 20250 + }, + { + "epoch": 1.43, + "learning_rate": 9.25554340672365e-06, + "loss": 0.7034, + "step": 20260 + }, + { + "epoch": 1.43, + "learning_rate": 9.233960533115326e-06, + "loss": 0.7151, + "step": 20270 + }, + { + "epoch": 1.44, + "learning_rate": 9.212397151777449e-06, + "loss": 0.6975, + "step": 20280 + }, + { + "epoch": 1.44, + "learning_rate": 9.190853289369825e-06, + "loss": 0.6909, + "step": 20290 + }, + { + "epoch": 1.44, + "learning_rate": 9.169328972528072e-06, + "loss": 0.7325, + "step": 20300 + }, + { + "epoch": 1.44, + "learning_rate": 9.147824227863697e-06, + "loss": 0.6977, + "step": 20310 + }, + { + "epoch": 1.44, + "learning_rate": 9.126339081963995e-06, + "loss": 0.7079, + "step": 20320 + }, + { + "epoch": 1.44, + "learning_rate": 9.104873561392032e-06, + "loss": 0.6974, + "step": 20330 + }, + { + "epoch": 1.44, + "learning_rate": 9.0834276926866e-06, + "loss": 0.7094, + "step": 20340 + }, + { + "epoch": 1.44, + "learning_rate": 9.062001502362192e-06, + "loss": 0.7133, + "step": 20350 + }, + { + "epoch": 1.44, + "learning_rate": 9.040595016908988e-06, + "loss": 0.7142, + "step": 20360 + }, + { + "epoch": 1.44, + "learning_rate": 9.019208262792802e-06, + "loss": 0.6902, + "step": 20370 + }, + { + "epoch": 1.44, + "learning_rate": 8.997841266455048e-06, + "loss": 0.7239, + "step": 20380 + }, + { + "epoch": 1.44, + "learning_rate": 8.976494054312701e-06, + "loss": 0.7354, + "step": 20390 + }, + { + "epoch": 1.44, + "learning_rate": 8.955166652758298e-06, + "loss": 0.719, + "step": 20400 + }, + { + "epoch": 1.44, + "learning_rate": 8.933859088159884e-06, + "loss": 0.6968, + "step": 20410 + }, + { + "epoch": 1.45, + "learning_rate": 8.912571386860958e-06, + "loss": 0.7093, + "step": 20420 + }, + { + "epoch": 1.45, + "learning_rate": 8.891303575180463e-06, + "loss": 0.6914, + "step": 20430 + }, + { + "epoch": 1.45, + "learning_rate": 8.870055679412767e-06, + "loss": 0.689, + "step": 20440 + }, + { + "epoch": 1.45, + "learning_rate": 8.848827725827621e-06, + "loss": 0.7132, + "step": 20450 + }, + { + "epoch": 1.45, + "learning_rate": 8.827619740670099e-06, + "loss": 0.6924, + "step": 20460 + }, + { + "epoch": 1.45, + "learning_rate": 8.806431750160585e-06, + "loss": 0.7063, + "step": 20470 + }, + { + "epoch": 1.45, + "learning_rate": 8.785263780494763e-06, + "loss": 0.6989, + "step": 20480 + }, + { + "epoch": 1.45, + "learning_rate": 8.764115857843555e-06, + "loss": 0.6888, + "step": 20490 + }, + { + "epoch": 1.45, + "learning_rate": 8.742988008353115e-06, + "loss": 0.7094, + "step": 20500 + }, + { + "epoch": 1.45, + "learning_rate": 8.72188025814473e-06, + "loss": 0.7201, + "step": 20510 + }, + { + "epoch": 1.45, + "learning_rate": 8.700792633314886e-06, + "loss": 0.7406, + "step": 20520 + }, + { + "epoch": 1.45, + "learning_rate": 8.67972515993517e-06, + "loss": 0.6906, + "step": 20530 + }, + { + "epoch": 1.45, + "learning_rate": 8.658677864052264e-06, + "loss": 0.7051, + "step": 20540 + }, + { + "epoch": 1.45, + "learning_rate": 8.637650771687891e-06, + "loss": 0.683, + "step": 20550 + }, + { + "epoch": 1.46, + "learning_rate": 8.616643908838787e-06, + "loss": 0.6955, + "step": 20560 + }, + { + "epoch": 1.46, + "learning_rate": 8.595657301476704e-06, + "loss": 0.6916, + "step": 20570 + }, + { + "epoch": 1.46, + "learning_rate": 8.574690975548339e-06, + "loss": 0.7069, + "step": 20580 + }, + { + "epoch": 1.46, + "learning_rate": 8.55374495697531e-06, + "loss": 0.7208, + "step": 20590 + }, + { + "epoch": 1.46, + "learning_rate": 8.53281927165412e-06, + "loss": 0.7038, + "step": 20600 + }, + { + "epoch": 1.46, + "learning_rate": 8.51191394545615e-06, + "loss": 0.6982, + "step": 20610 + }, + { + "epoch": 1.46, + "learning_rate": 8.49102900422762e-06, + "loss": 0.6804, + "step": 20620 + }, + { + "epoch": 1.46, + "learning_rate": 8.470164473789516e-06, + "loss": 0.6846, + "step": 20630 + }, + { + "epoch": 1.46, + "learning_rate": 8.449320379937594e-06, + "loss": 0.729, + "step": 20640 + }, + { + "epoch": 1.46, + "learning_rate": 8.428496748442371e-06, + "loss": 0.6942, + "step": 20650 + }, + { + "epoch": 1.46, + "learning_rate": 8.40769360504905e-06, + "loss": 0.7044, + "step": 20660 + }, + { + "epoch": 1.46, + "learning_rate": 8.386910975477494e-06, + "loss": 0.7172, + "step": 20670 + }, + { + "epoch": 1.46, + "learning_rate": 8.366148885422204e-06, + "loss": 0.7018, + "step": 20680 + }, + { + "epoch": 1.46, + "learning_rate": 8.345407360552302e-06, + "loss": 0.7247, + "step": 20690 + }, + { + "epoch": 1.47, + "learning_rate": 8.324686426511486e-06, + "loss": 0.698, + "step": 20700 + }, + { + "epoch": 1.47, + "learning_rate": 8.30398610891798e-06, + "loss": 0.7123, + "step": 20710 + }, + { + "epoch": 1.47, + "learning_rate": 8.283306433364518e-06, + "loss": 0.7027, + "step": 20720 + }, + { + "epoch": 1.47, + "learning_rate": 8.26264742541833e-06, + "loss": 0.699, + "step": 20730 + }, + { + "epoch": 1.47, + "learning_rate": 8.242009110621085e-06, + "loss": 0.7091, + "step": 20740 + }, + { + "epoch": 1.47, + "learning_rate": 8.221391514488885e-06, + "loss": 0.6848, + "step": 20750 + }, + { + "epoch": 1.47, + "learning_rate": 8.200794662512168e-06, + "loss": 0.6872, + "step": 20760 + }, + { + "epoch": 1.47, + "learning_rate": 8.180218580155774e-06, + "loss": 0.6894, + "step": 20770 + }, + { + "epoch": 1.47, + "learning_rate": 8.159663292858846e-06, + "loss": 0.7007, + "step": 20780 + }, + { + "epoch": 1.47, + "learning_rate": 8.13912882603483e-06, + "loss": 0.7175, + "step": 20790 + }, + { + "epoch": 1.47, + "learning_rate": 8.118615205071411e-06, + "loss": 0.7258, + "step": 20800 + }, + { + "epoch": 1.47, + "learning_rate": 8.098122455330497e-06, + "loss": 0.7141, + "step": 20810 + }, + { + "epoch": 1.47, + "learning_rate": 8.077650602148221e-06, + "loss": 0.7014, + "step": 20820 + }, + { + "epoch": 1.47, + "learning_rate": 8.057199670834867e-06, + "loss": 0.6966, + "step": 20830 + }, + { + "epoch": 1.48, + "learning_rate": 8.036769686674844e-06, + "loss": 0.7172, + "step": 20840 + }, + { + "epoch": 1.48, + "learning_rate": 8.016360674926663e-06, + "loss": 0.7032, + "step": 20850 + }, + { + "epoch": 1.48, + "learning_rate": 7.995972660822914e-06, + "loss": 0.7441, + "step": 20860 + }, + { + "epoch": 1.48, + "learning_rate": 7.975605669570235e-06, + "loss": 0.6719, + "step": 20870 + }, + { + "epoch": 1.48, + "learning_rate": 7.95525972634926e-06, + "loss": 0.7256, + "step": 20880 + }, + { + "epoch": 1.48, + "learning_rate": 7.934934856314586e-06, + "loss": 0.7079, + "step": 20890 + }, + { + "epoch": 1.48, + "learning_rate": 7.914631084594783e-06, + "loss": 0.693, + "step": 20900 + }, + { + "epoch": 1.48, + "learning_rate": 7.89434843629234e-06, + "loss": 0.7302, + "step": 20910 + }, + { + "epoch": 1.48, + "learning_rate": 7.874086936483599e-06, + "loss": 0.6851, + "step": 20920 + }, + { + "epoch": 1.48, + "learning_rate": 7.853846610218771e-06, + "loss": 0.7151, + "step": 20930 + }, + { + "epoch": 1.48, + "learning_rate": 7.833627482521893e-06, + "loss": 0.7283, + "step": 20940 + }, + { + "epoch": 1.48, + "learning_rate": 7.813429578390801e-06, + "loss": 0.726, + "step": 20950 + }, + { + "epoch": 1.48, + "learning_rate": 7.793252922797075e-06, + "loss": 0.6808, + "step": 20960 + }, + { + "epoch": 1.48, + "learning_rate": 7.773097540686023e-06, + "loss": 0.7085, + "step": 20970 + }, + { + "epoch": 1.49, + "learning_rate": 7.752963456976661e-06, + "loss": 0.6917, + "step": 20980 + }, + { + "epoch": 1.49, + "learning_rate": 7.732850696561683e-06, + "loss": 0.7309, + "step": 20990 + }, + { + "epoch": 1.49, + "learning_rate": 7.7127592843074e-06, + "loss": 0.7005, + "step": 21000 + }, + { + "epoch": 1.49, + "learning_rate": 7.692689245053728e-06, + "loss": 0.6843, + "step": 21010 + }, + { + "epoch": 1.49, + "learning_rate": 7.672640603614179e-06, + "loss": 0.7116, + "step": 21020 + }, + { + "epoch": 1.49, + "learning_rate": 7.652613384775791e-06, + "loss": 0.7229, + "step": 21030 + }, + { + "epoch": 1.49, + "learning_rate": 7.632607613299142e-06, + "loss": 0.7032, + "step": 21040 + }, + { + "epoch": 1.49, + "learning_rate": 7.612623313918263e-06, + "loss": 0.7184, + "step": 21050 + }, + { + "epoch": 1.49, + "learning_rate": 7.592660511340641e-06, + "loss": 0.7004, + "step": 21060 + }, + { + "epoch": 1.49, + "learning_rate": 7.572719230247205e-06, + "loss": 0.7081, + "step": 21070 + }, + { + "epoch": 1.49, + "learning_rate": 7.552799495292273e-06, + "loss": 0.6928, + "step": 21080 + }, + { + "epoch": 1.49, + "learning_rate": 7.532901331103512e-06, + "loss": 0.686, + "step": 21090 + }, + { + "epoch": 1.49, + "learning_rate": 7.513024762281914e-06, + "loss": 0.7178, + "step": 21100 + }, + { + "epoch": 1.49, + "learning_rate": 7.493169813401799e-06, + "loss": 0.6919, + "step": 21110 + }, + { + "epoch": 1.49, + "learning_rate": 7.473336509010742e-06, + "loss": 0.7132, + "step": 21120 + }, + { + "epoch": 1.5, + "learning_rate": 7.453524873629553e-06, + "loss": 0.7174, + "step": 21130 + }, + { + "epoch": 1.5, + "learning_rate": 7.4337349317522485e-06, + "loss": 0.7243, + "step": 21140 + }, + { + "epoch": 1.5, + "learning_rate": 7.41396670784604e-06, + "loss": 0.7158, + "step": 21150 + }, + { + "epoch": 1.5, + "learning_rate": 7.394220226351286e-06, + "loss": 0.7116, + "step": 21160 + }, + { + "epoch": 1.5, + "learning_rate": 7.374495511681454e-06, + "loss": 0.6906, + "step": 21170 + }, + { + "epoch": 1.5, + "learning_rate": 7.354792588223094e-06, + "loss": 0.6896, + "step": 21180 + }, + { + "epoch": 1.5, + "learning_rate": 7.3351114803358354e-06, + "loss": 0.7078, + "step": 21190 + }, + { + "epoch": 1.5, + "learning_rate": 7.3154522123523305e-06, + "loss": 0.7297, + "step": 21200 + }, + { + "epoch": 1.5, + "learning_rate": 7.295814808578216e-06, + "loss": 0.6861, + "step": 21210 + }, + { + "epoch": 1.5, + "learning_rate": 7.276199293292102e-06, + "loss": 0.6985, + "step": 21220 + }, + { + "epoch": 1.5, + "learning_rate": 7.256605690745547e-06, + "loss": 0.7065, + "step": 21230 + }, + { + "epoch": 1.5, + "learning_rate": 7.237034025163017e-06, + "loss": 0.7173, + "step": 21240 + }, + { + "epoch": 1.5, + "learning_rate": 7.217484320741838e-06, + "loss": 0.7191, + "step": 21250 + }, + { + "epoch": 1.5, + "learning_rate": 7.197956601652212e-06, + "loss": 0.7349, + "step": 21260 + }, + { + "epoch": 1.51, + "learning_rate": 7.178450892037128e-06, + "loss": 0.6995, + "step": 21270 + }, + { + "epoch": 1.51, + "learning_rate": 7.158967216012396e-06, + "loss": 0.7089, + "step": 21280 + }, + { + "epoch": 1.51, + "learning_rate": 7.139505597666557e-06, + "loss": 0.6755, + "step": 21290 + }, + { + "epoch": 1.51, + "learning_rate": 7.120066061060906e-06, + "loss": 0.6743, + "step": 21300 + }, + { + "epoch": 1.51, + "learning_rate": 7.100648630229412e-06, + "loss": 0.7079, + "step": 21310 + }, + { + "epoch": 1.51, + "learning_rate": 7.081253329178727e-06, + "loss": 0.7348, + "step": 21320 + }, + { + "epoch": 1.51, + "learning_rate": 7.061880181888158e-06, + "loss": 0.7047, + "step": 21330 + }, + { + "epoch": 1.51, + "learning_rate": 7.042529212309599e-06, + "loss": 0.7129, + "step": 21340 + }, + { + "epoch": 1.51, + "learning_rate": 7.023200444367517e-06, + "loss": 0.6997, + "step": 21350 + }, + { + "epoch": 1.51, + "learning_rate": 7.0038939019589605e-06, + "loss": 0.731, + "step": 21360 + }, + { + "epoch": 1.51, + "learning_rate": 6.984609608953488e-06, + "loss": 0.7097, + "step": 21370 + }, + { + "epoch": 1.51, + "learning_rate": 6.965347589193141e-06, + "loss": 0.7155, + "step": 21380 + }, + { + "epoch": 1.51, + "learning_rate": 6.9461078664924216e-06, + "loss": 0.7037, + "step": 21390 + }, + { + "epoch": 1.51, + "learning_rate": 6.926890464638277e-06, + "loss": 0.7201, + "step": 21400 + }, + { + "epoch": 1.52, + "learning_rate": 6.907695407390066e-06, + "loss": 0.7316, + "step": 21410 + }, + { + "epoch": 1.52, + "learning_rate": 6.888522718479498e-06, + "loss": 0.7124, + "step": 21420 + }, + { + "epoch": 1.52, + "learning_rate": 6.869372421610632e-06, + "loss": 0.7253, + "step": 21430 + }, + { + "epoch": 1.52, + "learning_rate": 6.85024454045986e-06, + "loss": 0.7065, + "step": 21440 + }, + { + "epoch": 1.52, + "learning_rate": 6.831139098675854e-06, + "loss": 0.7073, + "step": 21450 + }, + { + "epoch": 1.52, + "learning_rate": 6.812056119879534e-06, + "loss": 0.686, + "step": 21460 + }, + { + "epoch": 1.52, + "learning_rate": 6.792995627664042e-06, + "loss": 0.6915, + "step": 21470 + }, + { + "epoch": 1.52, + "learning_rate": 6.773957645594742e-06, + "loss": 0.7059, + "step": 21480 + }, + { + "epoch": 1.52, + "learning_rate": 6.754942197209163e-06, + "loss": 0.7029, + "step": 21490 + }, + { + "epoch": 1.52, + "learning_rate": 6.7359493060169475e-06, + "loss": 0.7351, + "step": 21500 + }, + { + "epoch": 1.52, + "learning_rate": 6.716978995499887e-06, + "loss": 0.7193, + "step": 21510 + }, + { + "epoch": 1.52, + "learning_rate": 6.698031289111825e-06, + "loss": 0.6966, + "step": 21520 + }, + { + "epoch": 1.52, + "learning_rate": 6.679106210278682e-06, + "loss": 0.7117, + "step": 21530 + }, + { + "epoch": 1.52, + "learning_rate": 6.660203782398383e-06, + "loss": 0.7054, + "step": 21540 + }, + { + "epoch": 1.53, + "learning_rate": 6.641324028840865e-06, + "loss": 0.712, + "step": 21550 + }, + { + "epoch": 1.53, + "learning_rate": 6.622466972948016e-06, + "loss": 0.7014, + "step": 21560 + }, + { + "epoch": 1.53, + "learning_rate": 6.603632638033683e-06, + "loss": 0.7101, + "step": 21570 + }, + { + "epoch": 1.53, + "learning_rate": 6.584821047383594e-06, + "loss": 0.7027, + "step": 21580 + }, + { + "epoch": 1.53, + "learning_rate": 6.566032224255389e-06, + "loss": 0.7388, + "step": 21590 + }, + { + "epoch": 1.53, + "learning_rate": 6.547266191878529e-06, + "loss": 0.6844, + "step": 21600 + }, + { + "epoch": 1.53, + "learning_rate": 6.528522973454315e-06, + "loss": 0.6999, + "step": 21610 + }, + { + "epoch": 1.53, + "learning_rate": 6.509802592155851e-06, + "loss": 0.7233, + "step": 21620 + }, + { + "epoch": 1.53, + "learning_rate": 6.491105071127984e-06, + "loss": 0.6955, + "step": 21630 + }, + { + "epoch": 1.53, + "learning_rate": 6.4724304334873e-06, + "loss": 0.7329, + "step": 21640 + }, + { + "epoch": 1.53, + "learning_rate": 6.453778702322114e-06, + "loss": 0.7384, + "step": 21650 + }, + { + "epoch": 1.53, + "learning_rate": 6.435149900692411e-06, + "loss": 0.6645, + "step": 21660 + }, + { + "epoch": 1.53, + "learning_rate": 6.416544051629819e-06, + "loss": 0.7142, + "step": 21670 + }, + { + "epoch": 1.53, + "learning_rate": 6.397961178137584e-06, + "loss": 0.7009, + "step": 21680 + }, + { + "epoch": 1.54, + "learning_rate": 6.3794013031905685e-06, + "loss": 0.6876, + "step": 21690 + }, + { + "epoch": 1.54, + "learning_rate": 6.36086444973519e-06, + "loss": 0.7037, + "step": 21700 + }, + { + "epoch": 1.54, + "learning_rate": 6.342350640689393e-06, + "loss": 0.7337, + "step": 21710 + }, + { + "epoch": 1.54, + "learning_rate": 6.323859898942649e-06, + "loss": 0.7101, + "step": 21720 + }, + { + "epoch": 1.54, + "learning_rate": 6.305392247355893e-06, + "loss": 0.7238, + "step": 21730 + }, + { + "epoch": 1.54, + "learning_rate": 6.2869477087615315e-06, + "loss": 0.7183, + "step": 21740 + }, + { + "epoch": 1.54, + "learning_rate": 6.268526305963374e-06, + "loss": 0.6999, + "step": 21750 + }, + { + "epoch": 1.54, + "learning_rate": 6.250128061736646e-06, + "loss": 0.697, + "step": 21760 + }, + { + "epoch": 1.54, + "learning_rate": 6.231752998827925e-06, + "loss": 0.7193, + "step": 21770 + }, + { + "epoch": 1.54, + "learning_rate": 6.213401139955144e-06, + "loss": 0.7374, + "step": 21780 + }, + { + "epoch": 1.54, + "learning_rate": 6.195072507807529e-06, + "loss": 0.7121, + "step": 21790 + }, + { + "epoch": 1.54, + "learning_rate": 6.17676712504561e-06, + "loss": 0.6946, + "step": 21800 + }, + { + "epoch": 1.54, + "learning_rate": 6.1584850143011546e-06, + "loss": 0.7179, + "step": 21810 + }, + { + "epoch": 1.54, + "learning_rate": 6.140226198177176e-06, + "loss": 0.6801, + "step": 21820 + }, + { + "epoch": 1.55, + "learning_rate": 6.121990699247865e-06, + "loss": 0.7136, + "step": 21830 + }, + { + "epoch": 1.55, + "learning_rate": 6.103778540058611e-06, + "loss": 0.7195, + "step": 21840 + }, + { + "epoch": 1.55, + "learning_rate": 6.085589743125919e-06, + "loss": 0.683, + "step": 21850 + }, + { + "epoch": 1.55, + "learning_rate": 6.067424330937438e-06, + "loss": 0.7171, + "step": 21860 + }, + { + "epoch": 1.55, + "learning_rate": 6.0492823259518795e-06, + "loss": 0.7437, + "step": 21870 + }, + { + "epoch": 1.55, + "learning_rate": 6.0311637505990394e-06, + "loss": 0.6891, + "step": 21880 + }, + { + "epoch": 1.55, + "learning_rate": 6.013068627279725e-06, + "loss": 0.7259, + "step": 21890 + }, + { + "epoch": 1.55, + "learning_rate": 5.994996978365763e-06, + "loss": 0.7382, + "step": 21900 + }, + { + "epoch": 1.55, + "learning_rate": 5.97694882619996e-06, + "loss": 0.7512, + "step": 21910 + }, + { + "epoch": 1.55, + "learning_rate": 5.9589241930960635e-06, + "loss": 0.7028, + "step": 21920 + }, + { + "epoch": 1.55, + "learning_rate": 5.940923101338733e-06, + "loss": 0.7125, + "step": 21930 + }, + { + "epoch": 1.55, + "learning_rate": 5.922945573183544e-06, + "loss": 0.707, + "step": 21940 + }, + { + "epoch": 1.55, + "learning_rate": 5.90499163085694e-06, + "loss": 0.706, + "step": 21950 + }, + { + "epoch": 1.55, + "learning_rate": 5.887061296556179e-06, + "loss": 0.7613, + "step": 21960 + }, + { + "epoch": 1.56, + "learning_rate": 5.869154592449364e-06, + "loss": 0.751, + "step": 21970 + }, + { + "epoch": 1.56, + "learning_rate": 5.8512715406753486e-06, + "loss": 0.7164, + "step": 21980 + }, + { + "epoch": 1.56, + "learning_rate": 5.8334121633437794e-06, + "loss": 0.7117, + "step": 21990 + }, + { + "epoch": 1.56, + "learning_rate": 5.815576482534999e-06, + "loss": 0.7227, + "step": 22000 + }, + { + "epoch": 1.56, + "learning_rate": 5.797764520300083e-06, + "loss": 0.687, + "step": 22010 + }, + { + "epoch": 1.56, + "learning_rate": 5.7799762986607585e-06, + "loss": 0.6959, + "step": 22020 + }, + { + "epoch": 1.56, + "learning_rate": 5.762211839609424e-06, + "loss": 0.6949, + "step": 22030 + }, + { + "epoch": 1.56, + "learning_rate": 5.744471165109069e-06, + "loss": 0.7237, + "step": 22040 + }, + { + "epoch": 1.56, + "learning_rate": 5.726754297093315e-06, + "loss": 0.718, + "step": 22050 + }, + { + "epoch": 1.56, + "learning_rate": 5.709061257466314e-06, + "loss": 0.7166, + "step": 22060 + }, + { + "epoch": 1.56, + "learning_rate": 5.691392068102786e-06, + "loss": 0.6881, + "step": 22070 + }, + { + "epoch": 1.56, + "learning_rate": 5.673746750847938e-06, + "loss": 0.7015, + "step": 22080 + }, + { + "epoch": 1.56, + "learning_rate": 5.656125327517495e-06, + "loss": 0.7148, + "step": 22090 + }, + { + "epoch": 1.56, + "learning_rate": 5.638527819897607e-06, + "loss": 0.7374, + "step": 22100 + }, + { + "epoch": 1.57, + "learning_rate": 5.620954249744884e-06, + "loss": 0.6898, + "step": 22110 + }, + { + "epoch": 1.57, + "learning_rate": 5.6034046387863165e-06, + "loss": 0.7184, + "step": 22120 + }, + { + "epoch": 1.57, + "learning_rate": 5.585879008719297e-06, + "loss": 0.7096, + "step": 22130 + }, + { + "epoch": 1.57, + "learning_rate": 5.568377381211548e-06, + "loss": 0.6917, + "step": 22140 + }, + { + "epoch": 1.57, + "learning_rate": 5.550899777901136e-06, + "loss": 0.7112, + "step": 22150 + }, + { + "epoch": 1.57, + "learning_rate": 5.533446220396404e-06, + "loss": 0.7252, + "step": 22160 + }, + { + "epoch": 1.57, + "learning_rate": 5.5160167302759884e-06, + "loss": 0.664, + "step": 22170 + }, + { + "epoch": 1.57, + "learning_rate": 5.498611329088751e-06, + "loss": 0.7099, + "step": 22180 + }, + { + "epoch": 1.57, + "learning_rate": 5.481230038353782e-06, + "loss": 0.7, + "step": 22190 + }, + { + "epoch": 1.57, + "learning_rate": 5.463872879560366e-06, + "loss": 0.7235, + "step": 22200 + }, + { + "epoch": 1.57, + "learning_rate": 5.4465398741679386e-06, + "loss": 0.6844, + "step": 22210 + }, + { + "epoch": 1.57, + "learning_rate": 5.42923104360609e-06, + "loss": 0.7504, + "step": 22220 + }, + { + "epoch": 1.57, + "learning_rate": 5.411946409274501e-06, + "loss": 0.6676, + "step": 22230 + }, + { + "epoch": 1.57, + "learning_rate": 5.394685992542964e-06, + "loss": 0.7014, + "step": 22240 + }, + { + "epoch": 1.57, + "learning_rate": 5.377449814751304e-06, + "loss": 0.7109, + "step": 22250 + }, + { + "epoch": 1.58, + "learning_rate": 5.3602378972094e-06, + "loss": 0.7328, + "step": 22260 + }, + { + "epoch": 1.58, + "learning_rate": 5.343050261197116e-06, + "loss": 0.6915, + "step": 22270 + }, + { + "epoch": 1.58, + "learning_rate": 5.325886927964319e-06, + "loss": 0.6845, + "step": 22280 + }, + { + "epoch": 1.58, + "learning_rate": 5.308747918730806e-06, + "loss": 0.7038, + "step": 22290 + }, + { + "epoch": 1.58, + "learning_rate": 5.29163325468632e-06, + "loss": 0.6908, + "step": 22300 + }, + { + "epoch": 1.58, + "learning_rate": 5.274542956990491e-06, + "loss": 0.7001, + "step": 22310 + }, + { + "epoch": 1.58, + "learning_rate": 5.257477046772844e-06, + "loss": 0.7159, + "step": 22320 + }, + { + "epoch": 1.58, + "learning_rate": 5.240435545132716e-06, + "loss": 0.705, + "step": 22330 + }, + { + "epoch": 1.58, + "learning_rate": 5.22341847313931e-06, + "loss": 0.6825, + "step": 22340 + }, + { + "epoch": 1.58, + "learning_rate": 5.206425851831592e-06, + "loss": 0.7245, + "step": 22350 + }, + { + "epoch": 1.58, + "learning_rate": 5.18945770221832e-06, + "loss": 0.7323, + "step": 22360 + }, + { + "epoch": 1.58, + "learning_rate": 5.172514045277979e-06, + "loss": 0.7015, + "step": 22370 + }, + { + "epoch": 1.58, + "learning_rate": 5.155594901958791e-06, + "loss": 0.7121, + "step": 22380 + }, + { + "epoch": 1.58, + "learning_rate": 5.13870029317865e-06, + "loss": 0.7172, + "step": 22390 + }, + { + "epoch": 1.59, + "learning_rate": 5.12183023982514e-06, + "loss": 0.7217, + "step": 22400 + }, + { + "epoch": 1.59, + "learning_rate": 5.1049847627554634e-06, + "loss": 0.6898, + "step": 22410 + }, + { + "epoch": 1.59, + "learning_rate": 5.088163882796448e-06, + "loss": 0.699, + "step": 22420 + }, + { + "epoch": 1.59, + "learning_rate": 5.071367620744527e-06, + "loss": 0.7336, + "step": 22430 + }, + { + "epoch": 1.59, + "learning_rate": 5.054595997365671e-06, + "loss": 0.7309, + "step": 22440 + }, + { + "epoch": 1.59, + "learning_rate": 5.037849033395392e-06, + "loss": 0.6978, + "step": 22450 + }, + { + "epoch": 1.59, + "learning_rate": 5.0211267495387295e-06, + "loss": 0.7039, + "step": 22460 + }, + { + "epoch": 1.59, + "learning_rate": 5.004429166470209e-06, + "loss": 0.7153, + "step": 22470 + }, + { + "epoch": 1.59, + "learning_rate": 4.987756304833796e-06, + "loss": 0.6851, + "step": 22480 + }, + { + "epoch": 1.59, + "learning_rate": 4.972771883223115e-06, + "loss": 0.7255, + "step": 22490 + }, + { + "epoch": 1.59, + "learning_rate": 4.956146049072402e-06, + "loss": 0.7188, + "step": 22500 + }, + { + "epoch": 1.59, + "learning_rate": 4.939544996048415e-06, + "loss": 0.7236, + "step": 22510 + }, + { + "epoch": 1.59, + "learning_rate": 4.922968744675788e-06, + "loss": 0.7312, + "step": 22520 + }, + { + "epoch": 1.59, + "learning_rate": 4.9064173154485086e-06, + "loss": 0.7279, + "step": 22530 + }, + { + "epoch": 1.6, + "learning_rate": 4.889890728829832e-06, + "loss": 0.6995, + "step": 22540 + }, + { + "epoch": 1.6, + "learning_rate": 4.8733890052523434e-06, + "loss": 0.7013, + "step": 22550 + }, + { + "epoch": 1.6, + "learning_rate": 4.856912165117871e-06, + "loss": 0.6899, + "step": 22560 + }, + { + "epoch": 1.6, + "learning_rate": 4.840460228797489e-06, + "loss": 0.698, + "step": 22570 + }, + { + "epoch": 1.6, + "learning_rate": 4.824033216631463e-06, + "loss": 0.7089, + "step": 22580 + }, + { + "epoch": 1.6, + "learning_rate": 4.807631148929248e-06, + "loss": 0.718, + "step": 22590 + }, + { + "epoch": 1.6, + "learning_rate": 4.791254045969476e-06, + "loss": 0.7047, + "step": 22600 + }, + { + "epoch": 1.6, + "learning_rate": 4.774901927999906e-06, + "loss": 0.7076, + "step": 22610 + }, + { + "epoch": 1.6, + "learning_rate": 4.758574815237396e-06, + "loss": 0.7187, + "step": 22620 + }, + { + "epoch": 1.6, + "learning_rate": 4.742272727867894e-06, + "loss": 0.7161, + "step": 22630 + }, + { + "epoch": 1.6, + "learning_rate": 4.7259956860464165e-06, + "loss": 0.7227, + "step": 22640 + }, + { + "epoch": 1.6, + "learning_rate": 4.711367778983819e-06, + "loss": 0.7202, + "step": 22650 + }, + { + "epoch": 1.6, + "learning_rate": 4.695138379119721e-06, + "loss": 0.7038, + "step": 22660 + }, + { + "epoch": 1.6, + "learning_rate": 4.678934083077979e-06, + "loss": 0.7102, + "step": 22670 + }, + { + "epoch": 1.61, + "learning_rate": 4.662754910892711e-06, + "loss": 0.6974, + "step": 22680 + }, + { + "epoch": 1.61, + "learning_rate": 4.646600882566954e-06, + "loss": 0.6962, + "step": 22690 + }, + { + "epoch": 1.61, + "learning_rate": 4.630472018072659e-06, + "loss": 0.6789, + "step": 22700 + }, + { + "epoch": 1.61, + "learning_rate": 4.614368337350686e-06, + "loss": 0.7192, + "step": 22710 + }, + { + "epoch": 1.61, + "learning_rate": 4.598289860310745e-06, + "loss": 0.6817, + "step": 22720 + }, + { + "epoch": 1.61, + "learning_rate": 4.582236606831378e-06, + "loss": 0.7246, + "step": 22730 + }, + { + "epoch": 1.61, + "learning_rate": 4.566208596759963e-06, + "loss": 0.7084, + "step": 22740 + }, + { + "epoch": 1.61, + "learning_rate": 4.550205849912648e-06, + "loss": 0.691, + "step": 22750 + }, + { + "epoch": 1.61, + "learning_rate": 4.534228386074363e-06, + "loss": 0.7319, + "step": 22760 + }, + { + "epoch": 1.61, + "learning_rate": 4.51827622499876e-06, + "loss": 0.7048, + "step": 22770 + }, + { + "epoch": 1.61, + "learning_rate": 4.502349386408236e-06, + "loss": 0.7237, + "step": 22780 + }, + { + "epoch": 1.61, + "learning_rate": 4.48644788999385e-06, + "loss": 0.6948, + "step": 22790 + }, + { + "epoch": 1.61, + "learning_rate": 4.470571755415354e-06, + "loss": 0.7186, + "step": 22800 + }, + { + "epoch": 1.61, + "learning_rate": 4.454721002301127e-06, + "loss": 0.7407, + "step": 22810 + }, + { + "epoch": 1.62, + "learning_rate": 4.438895650248184e-06, + "loss": 0.7064, + "step": 22820 + }, + { + "epoch": 1.62, + "learning_rate": 4.423095718822112e-06, + "loss": 0.6924, + "step": 22830 + }, + { + "epoch": 1.62, + "learning_rate": 4.4073212275570954e-06, + "loss": 0.7243, + "step": 22840 + }, + { + "epoch": 1.62, + "learning_rate": 4.3915721959558534e-06, + "loss": 0.7193, + "step": 22850 + }, + { + "epoch": 1.62, + "learning_rate": 4.37584864348963e-06, + "loss": 0.7117, + "step": 22860 + }, + { + "epoch": 1.62, + "learning_rate": 4.360150589598156e-06, + "loss": 0.692, + "step": 22870 + }, + { + "epoch": 1.62, + "learning_rate": 4.344478053689652e-06, + "loss": 0.7245, + "step": 22880 + }, + { + "epoch": 1.62, + "learning_rate": 4.328831055140798e-06, + "loss": 0.7022, + "step": 22890 + }, + { + "epoch": 1.62, + "learning_rate": 4.313209613296679e-06, + "loss": 0.7265, + "step": 22900 + }, + { + "epoch": 1.62, + "learning_rate": 4.297613747470789e-06, + "loss": 0.7039, + "step": 22910 + }, + { + "epoch": 1.62, + "learning_rate": 4.282043476945008e-06, + "loss": 0.6811, + "step": 22920 + }, + { + "epoch": 1.62, + "learning_rate": 4.2664988209695775e-06, + "loss": 0.6649, + "step": 22930 + }, + { + "epoch": 1.62, + "learning_rate": 4.250979798763052e-06, + "loss": 0.6998, + "step": 22940 + }, + { + "epoch": 1.62, + "learning_rate": 4.2354864295123e-06, + "loss": 0.7433, + "step": 22950 + }, + { + "epoch": 1.63, + "learning_rate": 4.220018732372485e-06, + "loss": 0.7184, + "step": 22960 + }, + { + "epoch": 1.63, + "learning_rate": 4.204576726467027e-06, + "loss": 0.7101, + "step": 22970 + }, + { + "epoch": 1.63, + "learning_rate": 4.1891604308875706e-06, + "loss": 0.7722, + "step": 22980 + }, + { + "epoch": 1.63, + "learning_rate": 4.17376986469398e-06, + "loss": 0.7269, + "step": 22990 + }, + { + "epoch": 1.63, + "learning_rate": 4.158405046914315e-06, + "loss": 0.6903, + "step": 23000 + }, + { + "epoch": 1.63, + "learning_rate": 4.143065996544804e-06, + "loss": 0.7359, + "step": 23010 + }, + { + "epoch": 1.63, + "learning_rate": 4.1277527325498e-06, + "loss": 0.6894, + "step": 23020 + }, + { + "epoch": 1.63, + "learning_rate": 4.112465273861799e-06, + "loss": 0.7237, + "step": 23030 + }, + { + "epoch": 1.63, + "learning_rate": 4.097203639381364e-06, + "loss": 0.7028, + "step": 23040 + }, + { + "epoch": 1.63, + "learning_rate": 4.081967847977164e-06, + "loss": 0.7038, + "step": 23050 + }, + { + "epoch": 1.63, + "learning_rate": 4.066757918485886e-06, + "loss": 0.711, + "step": 23060 + }, + { + "epoch": 1.63, + "learning_rate": 4.0515738697122694e-06, + "loss": 0.717, + "step": 23070 + }, + { + "epoch": 1.63, + "learning_rate": 4.036415720429027e-06, + "loss": 0.7134, + "step": 23080 + }, + { + "epoch": 1.63, + "learning_rate": 4.021283489376876e-06, + "loss": 0.709, + "step": 23090 + }, + { + "epoch": 1.64, + "learning_rate": 4.006177195264488e-06, + "loss": 0.7266, + "step": 23100 + }, + { + "epoch": 1.64, + "learning_rate": 3.9910968567684506e-06, + "loss": 0.6872, + "step": 23110 + }, + { + "epoch": 1.64, + "learning_rate": 3.976042492533269e-06, + "loss": 0.7256, + "step": 23120 + }, + { + "epoch": 1.64, + "learning_rate": 3.961014121171342e-06, + "loss": 0.7437, + "step": 23130 + }, + { + "epoch": 1.64, + "learning_rate": 3.946011761262932e-06, + "loss": 0.7111, + "step": 23140 + }, + { + "epoch": 1.64, + "learning_rate": 3.931035431356134e-06, + "loss": 0.697, + "step": 23150 + }, + { + "epoch": 1.64, + "learning_rate": 3.916085149966856e-06, + "loss": 0.7258, + "step": 23160 + }, + { + "epoch": 1.64, + "learning_rate": 3.901160935578815e-06, + "loss": 0.7029, + "step": 23170 + }, + { + "epoch": 1.64, + "learning_rate": 3.8862628066435065e-06, + "loss": 0.686, + "step": 23180 + }, + { + "epoch": 1.64, + "learning_rate": 3.8713907815801534e-06, + "loss": 0.6994, + "step": 23190 + }, + { + "epoch": 1.64, + "learning_rate": 3.856544878775708e-06, + "loss": 0.7039, + "step": 23200 + }, + { + "epoch": 1.64, + "learning_rate": 3.841725116584846e-06, + "loss": 0.7096, + "step": 23210 + }, + { + "epoch": 1.64, + "learning_rate": 3.8269315133299126e-06, + "loss": 0.7029, + "step": 23220 + }, + { + "epoch": 1.64, + "learning_rate": 3.8121640873009067e-06, + "loss": 0.7133, + "step": 23230 + }, + { + "epoch": 1.64, + "learning_rate": 3.7974228567554617e-06, + "loss": 0.7054, + "step": 23240 + }, + { + "epoch": 1.65, + "learning_rate": 3.7827078399188393e-06, + "loss": 0.7077, + "step": 23250 + }, + { + "epoch": 1.65, + "learning_rate": 3.7680190549838847e-06, + "loss": 0.6985, + "step": 23260 + }, + { + "epoch": 1.65, + "learning_rate": 3.753356520111004e-06, + "loss": 0.7222, + "step": 23270 + }, + { + "epoch": 1.65, + "learning_rate": 3.738720253428152e-06, + "loss": 0.7174, + "step": 23280 + }, + { + "epoch": 1.65, + "learning_rate": 3.724110273030812e-06, + "loss": 0.6935, + "step": 23290 + }, + { + "epoch": 1.65, + "learning_rate": 3.709526596981977e-06, + "loss": 0.7094, + "step": 23300 + }, + { + "epoch": 1.65, + "learning_rate": 3.6949692433120907e-06, + "loss": 0.7029, + "step": 23310 + }, + { + "epoch": 1.65, + "learning_rate": 3.6804382300190844e-06, + "loss": 0.7145, + "step": 23320 + }, + { + "epoch": 1.65, + "learning_rate": 3.665933575068298e-06, + "loss": 0.7247, + "step": 23330 + }, + { + "epoch": 1.65, + "learning_rate": 3.6514552963925004e-06, + "loss": 0.7393, + "step": 23340 + }, + { + "epoch": 1.65, + "learning_rate": 3.637003411891854e-06, + "loss": 0.7352, + "step": 23350 + }, + { + "epoch": 1.65, + "learning_rate": 3.622577939433866e-06, + "loss": 0.6873, + "step": 23360 + }, + { + "epoch": 1.65, + "learning_rate": 3.6081788968534066e-06, + "loss": 0.7056, + "step": 23370 + }, + { + "epoch": 1.65, + "learning_rate": 3.5938063019526653e-06, + "loss": 0.7287, + "step": 23380 + }, + { + "epoch": 1.66, + "learning_rate": 3.579460172501142e-06, + "loss": 0.717, + "step": 23390 + }, + { + "epoch": 1.66, + "learning_rate": 3.5651405262356024e-06, + "loss": 0.7258, + "step": 23400 + }, + { + "epoch": 1.66, + "learning_rate": 3.5508473808600674e-06, + "loss": 0.6985, + "step": 23410 + }, + { + "epoch": 1.66, + "learning_rate": 3.5365807540458097e-06, + "loss": 0.7059, + "step": 23420 + }, + { + "epoch": 1.66, + "learning_rate": 3.522340663431314e-06, + "loss": 0.7047, + "step": 23430 + }, + { + "epoch": 1.66, + "learning_rate": 3.5081271266222434e-06, + "loss": 0.7064, + "step": 23440 + }, + { + "epoch": 1.66, + "learning_rate": 3.4939401611914337e-06, + "loss": 0.6804, + "step": 23450 + }, + { + "epoch": 1.66, + "learning_rate": 3.479779784678877e-06, + "loss": 0.7099, + "step": 23460 + }, + { + "epoch": 1.66, + "learning_rate": 3.465646014591703e-06, + "loss": 0.7182, + "step": 23470 + }, + { + "epoch": 1.66, + "learning_rate": 3.4515388684041193e-06, + "loss": 0.6964, + "step": 23480 + }, + { + "epoch": 1.66, + "learning_rate": 3.437458363557433e-06, + "loss": 0.707, + "step": 23490 + }, + { + "epoch": 1.66, + "learning_rate": 3.4234045174600103e-06, + "loss": 0.6729, + "step": 23500 + }, + { + "epoch": 1.66, + "learning_rate": 3.409377347487272e-06, + "loss": 0.6822, + "step": 23510 + }, + { + "epoch": 1.66, + "learning_rate": 3.395376870981634e-06, + "loss": 0.7339, + "step": 23520 + }, + { + "epoch": 1.67, + "learning_rate": 3.3814031052525175e-06, + "loss": 0.7011, + "step": 23530 + }, + { + "epoch": 1.67, + "learning_rate": 3.367456067576327e-06, + "loss": 0.7216, + "step": 23540 + }, + { + "epoch": 1.67, + "learning_rate": 3.353535775196423e-06, + "loss": 0.7194, + "step": 23550 + }, + { + "epoch": 1.67, + "learning_rate": 3.339642245323102e-06, + "loss": 0.7163, + "step": 23560 + }, + { + "epoch": 1.67, + "learning_rate": 3.325775495133546e-06, + "loss": 0.736, + "step": 23570 + }, + { + "epoch": 1.67, + "learning_rate": 3.31193554177186e-06, + "loss": 0.7071, + "step": 23580 + }, + { + "epoch": 1.67, + "learning_rate": 3.298122402349002e-06, + "loss": 0.6889, + "step": 23590 + }, + { + "epoch": 1.67, + "learning_rate": 3.2843360939427943e-06, + "loss": 0.6933, + "step": 23600 + }, + { + "epoch": 1.67, + "learning_rate": 3.270576633597866e-06, + "loss": 0.699, + "step": 23610 + }, + { + "epoch": 1.67, + "learning_rate": 3.2568440383256598e-06, + "loss": 0.6603, + "step": 23620 + }, + { + "epoch": 1.67, + "learning_rate": 3.243138325104411e-06, + "loss": 0.6971, + "step": 23630 + }, + { + "epoch": 1.67, + "learning_rate": 3.230826181364585e-06, + "loss": 0.7269, + "step": 23640 + }, + { + "epoch": 1.67, + "learning_rate": 3.217171590696141e-06, + "loss": 0.7277, + "step": 23650 + }, + { + "epoch": 1.67, + "learning_rate": 3.2035439311275006e-06, + "loss": 0.6943, + "step": 23660 + }, + { + "epoch": 1.68, + "learning_rate": 3.1899432195071575e-06, + "loss": 0.7204, + "step": 23670 + }, + { + "epoch": 1.68, + "learning_rate": 3.176369472650292e-06, + "loss": 0.7241, + "step": 23680 + }, + { + "epoch": 1.68, + "learning_rate": 3.1628227073387474e-06, + "loss": 0.6712, + "step": 23690 + }, + { + "epoch": 1.68, + "learning_rate": 3.1493029403209973e-06, + "loss": 0.6877, + "step": 23700 + }, + { + "epoch": 1.68, + "learning_rate": 3.1358101883121547e-06, + "loss": 0.6953, + "step": 23710 + }, + { + "epoch": 1.68, + "learning_rate": 3.122344467993907e-06, + "loss": 0.6918, + "step": 23720 + }, + { + "epoch": 1.68, + "learning_rate": 3.1089057960145498e-06, + "loss": 0.6866, + "step": 23730 + }, + { + "epoch": 1.68, + "learning_rate": 3.0954941889889096e-06, + "loss": 0.6975, + "step": 23740 + }, + { + "epoch": 1.68, + "learning_rate": 3.082109663498378e-06, + "loss": 0.7213, + "step": 23750 + }, + { + "epoch": 1.68, + "learning_rate": 3.068752236090841e-06, + "loss": 0.7225, + "step": 23760 + }, + { + "epoch": 1.68, + "learning_rate": 3.055421923280702e-06, + "loss": 0.7064, + "step": 23770 + }, + { + "epoch": 1.68, + "learning_rate": 3.0421187415488246e-06, + "loss": 0.696, + "step": 23780 + }, + { + "epoch": 1.68, + "learning_rate": 3.028842707342541e-06, + "loss": 0.7251, + "step": 23790 + }, + { + "epoch": 1.68, + "learning_rate": 3.0155938370756116e-06, + "loss": 0.7075, + "step": 23800 + }, + { + "epoch": 1.69, + "learning_rate": 3.0023721471282214e-06, + "loss": 0.7181, + "step": 23810 + }, + { + "epoch": 1.69, + "learning_rate": 2.9891776538469362e-06, + "loss": 0.6982, + "step": 23820 + }, + { + "epoch": 1.69, + "learning_rate": 2.9760103735447186e-06, + "loss": 0.6984, + "step": 23830 + }, + { + "epoch": 1.69, + "learning_rate": 2.962870322500866e-06, + "loss": 0.7363, + "step": 23840 + }, + { + "epoch": 1.69, + "learning_rate": 2.9497575169610243e-06, + "loss": 0.6743, + "step": 23850 + }, + { + "epoch": 1.69, + "learning_rate": 2.9366719731371563e-06, + "loss": 0.7141, + "step": 23860 + }, + { + "epoch": 1.69, + "learning_rate": 2.9236137072075067e-06, + "loss": 0.7228, + "step": 23870 + }, + { + "epoch": 1.69, + "learning_rate": 2.910582735316597e-06, + "loss": 0.7028, + "step": 23880 + }, + { + "epoch": 1.69, + "learning_rate": 2.8975790735752186e-06, + "loss": 0.7098, + "step": 23890 + }, + { + "epoch": 1.69, + "learning_rate": 2.8846027380603908e-06, + "loss": 0.6907, + "step": 23900 + }, + { + "epoch": 1.69, + "learning_rate": 2.8716537448153364e-06, + "loss": 0.7226, + "step": 23910 + }, + { + "epoch": 1.69, + "learning_rate": 2.8587321098494963e-06, + "loss": 0.7298, + "step": 23920 + }, + { + "epoch": 1.69, + "learning_rate": 2.8458378491384606e-06, + "loss": 0.7172, + "step": 23930 + }, + { + "epoch": 1.69, + "learning_rate": 2.832970978624e-06, + "loss": 0.7065, + "step": 23940 + }, + { + "epoch": 1.7, + "learning_rate": 2.8201315142140055e-06, + "loss": 0.6787, + "step": 23950 + }, + { + "epoch": 1.7, + "learning_rate": 2.8073194717824935e-06, + "loss": 0.6846, + "step": 23960 + }, + { + "epoch": 1.7, + "learning_rate": 2.794534867169568e-06, + "loss": 0.7014, + "step": 23970 + }, + { + "epoch": 1.7, + "learning_rate": 2.7817777161814245e-06, + "loss": 0.721, + "step": 23980 + }, + { + "epoch": 1.7, + "learning_rate": 2.769048034590299e-06, + "loss": 0.7459, + "step": 23990 + }, + { + "epoch": 1.7, + "learning_rate": 2.7563458381344858e-06, + "loss": 0.6801, + "step": 24000 + }, + { + "epoch": 1.7, + "learning_rate": 2.7436711425182726e-06, + "loss": 0.7142, + "step": 24010 + }, + { + "epoch": 1.7, + "learning_rate": 2.7310239634119744e-06, + "loss": 0.6988, + "step": 24020 + }, + { + "epoch": 1.7, + "learning_rate": 2.718404316451864e-06, + "loss": 0.7297, + "step": 24030 + }, + { + "epoch": 1.7, + "learning_rate": 2.7058122172401916e-06, + "loss": 0.672, + "step": 24040 + }, + { + "epoch": 1.7, + "learning_rate": 2.693247681345132e-06, + "loss": 0.7278, + "step": 24050 + }, + { + "epoch": 1.7, + "learning_rate": 2.680710724300803e-06, + "loss": 0.7055, + "step": 24060 + }, + { + "epoch": 1.7, + "learning_rate": 2.6682013616072005e-06, + "loss": 0.7266, + "step": 24070 + }, + { + "epoch": 1.7, + "learning_rate": 2.655719608730231e-06, + "loss": 0.7121, + "step": 24080 + }, + { + "epoch": 1.71, + "learning_rate": 2.6432654811016395e-06, + "loss": 0.6943, + "step": 24090 + }, + { + "epoch": 1.71, + "learning_rate": 2.630838994119042e-06, + "loss": 0.709, + "step": 24100 + }, + { + "epoch": 1.71, + "learning_rate": 2.618440163145855e-06, + "loss": 0.7115, + "step": 24110 + }, + { + "epoch": 1.71, + "learning_rate": 2.60606900351133e-06, + "loss": 0.7056, + "step": 24120 + }, + { + "epoch": 1.71, + "learning_rate": 2.5937255305104825e-06, + "loss": 0.7071, + "step": 24130 + }, + { + "epoch": 1.71, + "learning_rate": 2.581409759404113e-06, + "loss": 0.6953, + "step": 24140 + }, + { + "epoch": 1.71, + "learning_rate": 2.5691217054187726e-06, + "loss": 0.7178, + "step": 24150 + }, + { + "epoch": 1.71, + "learning_rate": 2.556861383746731e-06, + "loss": 0.7034, + "step": 24160 + }, + { + "epoch": 1.71, + "learning_rate": 2.54462880954599e-06, + "loss": 0.7028, + "step": 24170 + }, + { + "epoch": 1.71, + "learning_rate": 2.532423997940231e-06, + "loss": 0.6898, + "step": 24180 + }, + { + "epoch": 1.71, + "learning_rate": 2.5202469640188187e-06, + "loss": 0.7106, + "step": 24190 + }, + { + "epoch": 1.71, + "learning_rate": 2.508097722836769e-06, + "loss": 0.7149, + "step": 24200 + }, + { + "epoch": 1.71, + "learning_rate": 2.4959762894147503e-06, + "loss": 0.7059, + "step": 24210 + }, + { + "epoch": 1.71, + "learning_rate": 2.48388267873903e-06, + "loss": 0.6858, + "step": 24220 + }, + { + "epoch": 1.72, + "learning_rate": 2.4718169057614953e-06, + "loss": 0.6949, + "step": 24230 + }, + { + "epoch": 1.72, + "learning_rate": 2.4597789853996022e-06, + "loss": 0.7374, + "step": 24240 + }, + { + "epoch": 1.72, + "learning_rate": 2.4477689325363875e-06, + "loss": 0.6925, + "step": 24250 + }, + { + "epoch": 1.72, + "learning_rate": 2.4357867620204174e-06, + "loss": 0.7254, + "step": 24260 + }, + { + "epoch": 1.72, + "learning_rate": 2.4238324886657976e-06, + "loss": 0.7108, + "step": 24270 + }, + { + "epoch": 1.72, + "learning_rate": 2.411906127252134e-06, + "loss": 0.7205, + "step": 24280 + }, + { + "epoch": 1.72, + "learning_rate": 2.4000076925245364e-06, + "loss": 0.7125, + "step": 24290 + }, + { + "epoch": 1.72, + "learning_rate": 2.388137199193571e-06, + "loss": 0.7166, + "step": 24300 + }, + { + "epoch": 1.72, + "learning_rate": 2.3762946619352773e-06, + "loss": 0.7536, + "step": 24310 + }, + { + "epoch": 1.72, + "learning_rate": 2.364480095391114e-06, + "loss": 0.6741, + "step": 24320 + }, + { + "epoch": 1.72, + "learning_rate": 2.3526935141679695e-06, + "loss": 0.7399, + "step": 24330 + }, + { + "epoch": 1.72, + "learning_rate": 2.3409349328381264e-06, + "loss": 0.6885, + "step": 24340 + }, + { + "epoch": 1.72, + "learning_rate": 2.329204365939261e-06, + "loss": 0.6654, + "step": 24350 + }, + { + "epoch": 1.72, + "learning_rate": 2.3175018279743943e-06, + "loss": 0.7019, + "step": 24360 + }, + { + "epoch": 1.72, + "learning_rate": 2.305827333411914e-06, + "loss": 0.6802, + "step": 24370 + }, + { + "epoch": 1.73, + "learning_rate": 2.2941808966855217e-06, + "loss": 0.6845, + "step": 24380 + }, + { + "epoch": 1.73, + "learning_rate": 2.2825625321942433e-06, + "loss": 0.7087, + "step": 24390 + }, + { + "epoch": 1.73, + "learning_rate": 2.270972254302381e-06, + "loss": 0.6803, + "step": 24400 + }, + { + "epoch": 1.73, + "learning_rate": 2.259410077339527e-06, + "loss": 0.7212, + "step": 24410 + }, + { + "epoch": 1.73, + "learning_rate": 2.2478760156005278e-06, + "loss": 0.7025, + "step": 24420 + }, + { + "epoch": 1.73, + "learning_rate": 2.2363700833454627e-06, + "loss": 0.6774, + "step": 24430 + }, + { + "epoch": 1.73, + "learning_rate": 2.2248922947996464e-06, + "loss": 0.7134, + "step": 24440 + }, + { + "epoch": 1.73, + "learning_rate": 2.2134426641535756e-06, + "loss": 0.71, + "step": 24450 + }, + { + "epoch": 1.73, + "learning_rate": 2.202021205562965e-06, + "loss": 0.7253, + "step": 24460 + }, + { + "epoch": 1.73, + "learning_rate": 2.19062793314867e-06, + "loss": 0.7008, + "step": 24470 + }, + { + "epoch": 1.73, + "learning_rate": 2.179262860996717e-06, + "loss": 0.7299, + "step": 24480 + }, + { + "epoch": 1.73, + "learning_rate": 2.1679260031582575e-06, + "loss": 0.7259, + "step": 24490 + }, + { + "epoch": 1.73, + "learning_rate": 2.1566173736495663e-06, + "loss": 0.6953, + "step": 24500 + }, + { + "epoch": 1.73, + "learning_rate": 2.1453369864520125e-06, + "loss": 0.7024, + "step": 24510 + }, + { + "epoch": 1.74, + "learning_rate": 2.134084855512056e-06, + "loss": 0.7059, + "step": 24520 + }, + { + "epoch": 1.74, + "learning_rate": 2.1228609947412074e-06, + "loss": 0.6975, + "step": 24530 + }, + { + "epoch": 1.74, + "learning_rate": 2.111665418016051e-06, + "loss": 0.7137, + "step": 24540 + }, + { + "epoch": 1.74, + "learning_rate": 2.1004981391781697e-06, + "loss": 0.7219, + "step": 24550 + }, + { + "epoch": 1.74, + "learning_rate": 2.0893591720341888e-06, + "loss": 0.7267, + "step": 24560 + }, + { + "epoch": 1.74, + "learning_rate": 2.0782485303557144e-06, + "loss": 0.7114, + "step": 24570 + }, + { + "epoch": 1.74, + "learning_rate": 2.0671662278793387e-06, + "loss": 0.7075, + "step": 24580 + }, + { + "epoch": 1.74, + "learning_rate": 2.0561122783066128e-06, + "loss": 0.6794, + "step": 24590 + }, + { + "epoch": 1.74, + "learning_rate": 2.0450866953040436e-06, + "loss": 0.7195, + "step": 24600 + }, + { + "epoch": 1.74, + "learning_rate": 2.034089492503052e-06, + "loss": 0.7097, + "step": 24610 + }, + { + "epoch": 1.74, + "learning_rate": 2.0231206834999793e-06, + "loss": 0.7114, + "step": 24620 + }, + { + "epoch": 1.74, + "learning_rate": 2.0121802818560716e-06, + "loss": 0.7295, + "step": 24630 + }, + { + "epoch": 1.74, + "learning_rate": 2.0012683010974366e-06, + "loss": 0.7161, + "step": 24640 + }, + { + "epoch": 1.74, + "learning_rate": 1.9903847547150495e-06, + "loss": 0.6945, + "step": 24650 + }, + { + "epoch": 1.75, + "learning_rate": 1.979529656164736e-06, + "loss": 0.7025, + "step": 24660 + }, + { + "epoch": 1.75, + "learning_rate": 1.9687030188671517e-06, + "loss": 0.7047, + "step": 24670 + }, + { + "epoch": 1.75, + "learning_rate": 1.957904856207754e-06, + "loss": 0.6837, + "step": 24680 + }, + { + "epoch": 1.75, + "learning_rate": 1.9471351815367995e-06, + "loss": 0.7309, + "step": 24690 + }, + { + "epoch": 1.75, + "learning_rate": 1.9363940081693287e-06, + "loss": 0.7112, + "step": 24700 + }, + { + "epoch": 1.75, + "learning_rate": 1.92568134938515e-06, + "loss": 0.698, + "step": 24710 + }, + { + "epoch": 1.75, + "learning_rate": 1.914997218428796e-06, + "loss": 0.7365, + "step": 24720 + }, + { + "epoch": 1.75, + "learning_rate": 1.9043416285095568e-06, + "loss": 0.6988, + "step": 24730 + }, + { + "epoch": 1.75, + "learning_rate": 1.8937145928014073e-06, + "loss": 0.7022, + "step": 24740 + }, + { + "epoch": 1.75, + "learning_rate": 1.883116124443049e-06, + "loss": 0.7207, + "step": 24750 + }, + { + "epoch": 1.75, + "learning_rate": 1.8725462365378366e-06, + "loss": 0.7202, + "step": 24760 + }, + { + "epoch": 1.75, + "learning_rate": 1.862004942153811e-06, + "loss": 0.7263, + "step": 24770 + }, + { + "epoch": 1.75, + "learning_rate": 1.85149225432365e-06, + "loss": 0.7123, + "step": 24780 + }, + { + "epoch": 1.75, + "learning_rate": 1.8410081860446682e-06, + "loss": 0.7025, + "step": 24790 + }, + { + "epoch": 1.76, + "learning_rate": 1.8305527502787912e-06, + "loss": 0.7225, + "step": 24800 + }, + { + "epoch": 1.76, + "learning_rate": 1.8201259599525567e-06, + "loss": 0.6956, + "step": 24810 + }, + { + "epoch": 1.76, + "learning_rate": 1.8097278279570696e-06, + "loss": 0.716, + "step": 24820 + }, + { + "epoch": 1.76, + "learning_rate": 1.7993583671480213e-06, + "loss": 0.6895, + "step": 24830 + }, + { + "epoch": 1.76, + "learning_rate": 1.7890175903456403e-06, + "loss": 0.7089, + "step": 24840 + }, + { + "epoch": 1.76, + "learning_rate": 1.778705510334705e-06, + "loss": 0.7165, + "step": 24850 + }, + { + "epoch": 1.76, + "learning_rate": 1.7684221398645007e-06, + "loss": 0.6951, + "step": 24860 + }, + { + "epoch": 1.76, + "learning_rate": 1.7581674916488267e-06, + "loss": 0.6925, + "step": 24870 + }, + { + "epoch": 1.76, + "learning_rate": 1.7479415783659774e-06, + "loss": 0.724, + "step": 24880 + }, + { + "epoch": 1.76, + "learning_rate": 1.7377444126587094e-06, + "loss": 0.6585, + "step": 24890 + }, + { + "epoch": 1.76, + "learning_rate": 1.7275760071342379e-06, + "loss": 0.7301, + "step": 24900 + }, + { + "epoch": 1.76, + "learning_rate": 1.7174363743642257e-06, + "loss": 0.6777, + "step": 24910 + }, + { + "epoch": 1.76, + "learning_rate": 1.7073255268847676e-06, + "loss": 0.6788, + "step": 24920 + }, + { + "epoch": 1.76, + "learning_rate": 1.6972434771963586e-06, + "loss": 0.7013, + "step": 24930 + }, + { + "epoch": 1.77, + "learning_rate": 1.6871902377638915e-06, + "loss": 0.6847, + "step": 24940 + }, + { + "epoch": 1.77, + "learning_rate": 1.6771658210166436e-06, + "loss": 0.7232, + "step": 24950 + }, + { + "epoch": 1.77, + "learning_rate": 1.6671702393482646e-06, + "loss": 0.7024, + "step": 24960 + }, + { + "epoch": 1.77, + "learning_rate": 1.657203505116739e-06, + "loss": 0.674, + "step": 24970 + }, + { + "epoch": 1.77, + "learning_rate": 1.647265630644393e-06, + "loss": 0.7264, + "step": 24980 + }, + { + "epoch": 1.77, + "learning_rate": 1.6373566282178704e-06, + "loss": 0.703, + "step": 24990 + }, + { + "epoch": 1.77, + "learning_rate": 1.6274765100881272e-06, + "loss": 0.7121, + "step": 25000 + }, + { + "epoch": 1.77, + "learning_rate": 1.6176252884703924e-06, + "loss": 0.7126, + "step": 25010 + }, + { + "epoch": 1.77, + "learning_rate": 1.6078029755441926e-06, + "loss": 0.7127, + "step": 25020 + }, + { + "epoch": 1.77, + "learning_rate": 1.5980095834532832e-06, + "loss": 0.7084, + "step": 25030 + }, + { + "epoch": 1.77, + "learning_rate": 1.5882451243056928e-06, + "loss": 0.6787, + "step": 25040 + }, + { + "epoch": 1.77, + "learning_rate": 1.578509610173659e-06, + "loss": 0.7314, + "step": 25050 + }, + { + "epoch": 1.77, + "learning_rate": 1.5688030530936425e-06, + "loss": 0.7144, + "step": 25060 + }, + { + "epoch": 1.77, + "learning_rate": 1.559125465066294e-06, + "loss": 0.7208, + "step": 25070 + }, + { + "epoch": 1.78, + "learning_rate": 1.5494768580564618e-06, + "loss": 0.7474, + "step": 25080 + }, + { + "epoch": 1.78, + "learning_rate": 1.5398572439931591e-06, + "loss": 0.7082, + "step": 25090 + }, + { + "epoch": 1.78, + "learning_rate": 1.5302666347695477e-06, + "loss": 0.7083, + "step": 25100 + }, + { + "epoch": 1.78, + "learning_rate": 1.5207050422429286e-06, + "loss": 0.7155, + "step": 25110 + }, + { + "epoch": 1.78, + "learning_rate": 1.5111724782347375e-06, + "loss": 0.6739, + "step": 25120 + }, + { + "epoch": 1.78, + "learning_rate": 1.501668954530519e-06, + "loss": 0.7285, + "step": 25130 + }, + { + "epoch": 1.78, + "learning_rate": 1.4921944828799105e-06, + "loss": 0.6637, + "step": 25140 + }, + { + "epoch": 1.78, + "learning_rate": 1.4827490749966227e-06, + "loss": 0.6976, + "step": 25150 + }, + { + "epoch": 1.78, + "learning_rate": 1.4733327425584504e-06, + "loss": 0.7068, + "step": 25160 + }, + { + "epoch": 1.78, + "learning_rate": 1.4639454972072337e-06, + "loss": 0.6992, + "step": 25170 + }, + { + "epoch": 1.78, + "learning_rate": 1.4545873505488471e-06, + "loss": 0.694, + "step": 25180 + }, + { + "epoch": 1.78, + "learning_rate": 1.4452583141531888e-06, + "loss": 0.7144, + "step": 25190 + }, + { + "epoch": 1.78, + "learning_rate": 1.4359583995541741e-06, + "loss": 0.7099, + "step": 25200 + }, + { + "epoch": 1.78, + "learning_rate": 1.426687618249714e-06, + "loss": 0.6898, + "step": 25210 + }, + { + "epoch": 1.79, + "learning_rate": 1.4174459817016927e-06, + "loss": 0.6764, + "step": 25220 + }, + { + "epoch": 1.79, + "learning_rate": 1.4082335013359593e-06, + "loss": 0.6904, + "step": 25230 + }, + { + "epoch": 1.79, + "learning_rate": 1.3990501885423251e-06, + "loss": 0.7081, + "step": 25240 + }, + { + "epoch": 1.79, + "learning_rate": 1.3898960546745443e-06, + "loss": 0.6962, + "step": 25250 + }, + { + "epoch": 1.79, + "learning_rate": 1.3807711110502802e-06, + "loss": 0.7231, + "step": 25260 + }, + { + "epoch": 1.79, + "learning_rate": 1.3716753689511142e-06, + "loss": 0.7064, + "step": 25270 + }, + { + "epoch": 1.79, + "learning_rate": 1.3626088396225284e-06, + "loss": 0.6942, + "step": 25280 + }, + { + "epoch": 1.79, + "learning_rate": 1.353571534273887e-06, + "loss": 0.7291, + "step": 25290 + }, + { + "epoch": 1.79, + "learning_rate": 1.3445634640784133e-06, + "loss": 0.6808, + "step": 25300 + }, + { + "epoch": 1.79, + "learning_rate": 1.3355846401732015e-06, + "loss": 0.6962, + "step": 25310 + }, + { + "epoch": 1.79, + "learning_rate": 1.3266350736591744e-06, + "loss": 0.736, + "step": 25320 + }, + { + "epoch": 1.79, + "learning_rate": 1.3177147756010893e-06, + "loss": 0.6821, + "step": 25330 + }, + { + "epoch": 1.79, + "learning_rate": 1.3088237570275165e-06, + "loss": 0.6935, + "step": 25340 + }, + { + "epoch": 1.79, + "learning_rate": 1.2999620289308263e-06, + "loss": 0.7366, + "step": 25350 + }, + { + "epoch": 1.8, + "learning_rate": 1.2911296022671716e-06, + "loss": 0.7145, + "step": 25360 + }, + { + "epoch": 1.8, + "learning_rate": 1.2823264879564838e-06, + "loss": 0.6949, + "step": 25370 + }, + { + "epoch": 1.8, + "learning_rate": 1.2735526968824575e-06, + "loss": 0.7115, + "step": 25380 + }, + { + "epoch": 1.8, + "learning_rate": 1.264808239892526e-06, + "loss": 0.7214, + "step": 25390 + }, + { + "epoch": 1.8, + "learning_rate": 1.2560931277978526e-06, + "loss": 0.7128, + "step": 25400 + }, + { + "epoch": 1.8, + "learning_rate": 1.2474073713733353e-06, + "loss": 0.7109, + "step": 25410 + }, + { + "epoch": 1.8, + "learning_rate": 1.238750981357567e-06, + "loss": 0.7233, + "step": 25420 + }, + { + "epoch": 1.8, + "learning_rate": 1.2301239684528342e-06, + "loss": 0.7049, + "step": 25430 + }, + { + "epoch": 1.8, + "learning_rate": 1.2215263433250995e-06, + "loss": 0.7242, + "step": 25440 + }, + { + "epoch": 1.8, + "learning_rate": 1.2129581166040043e-06, + "loss": 0.6809, + "step": 25450 + }, + { + "epoch": 1.8, + "learning_rate": 1.2044192988828362e-06, + "loss": 0.7136, + "step": 25460 + }, + { + "epoch": 1.8, + "learning_rate": 1.1959099007185226e-06, + "loss": 0.6874, + "step": 25470 + }, + { + "epoch": 1.8, + "learning_rate": 1.1874299326316147e-06, + "loss": 0.7122, + "step": 25480 + }, + { + "epoch": 1.8, + "learning_rate": 1.1789794051062815e-06, + "loss": 0.7186, + "step": 25490 + }, + { + "epoch": 1.8, + "learning_rate": 1.1705583285903043e-06, + "loss": 0.7135, + "step": 25500 + }, + { + "epoch": 1.81, + "learning_rate": 1.162166713495033e-06, + "loss": 0.6536, + "step": 25510 + }, + { + "epoch": 1.81, + "learning_rate": 1.1538045701954047e-06, + "loss": 0.6984, + "step": 25520 + }, + { + "epoch": 1.81, + "learning_rate": 1.1454719090299131e-06, + "loss": 0.7201, + "step": 25530 + }, + { + "epoch": 1.81, + "learning_rate": 1.1371687403006147e-06, + "loss": 0.6937, + "step": 25540 + }, + { + "epoch": 1.81, + "learning_rate": 1.1288950742730898e-06, + "loss": 0.7263, + "step": 25550 + }, + { + "epoch": 1.81, + "learning_rate": 1.1206509211764415e-06, + "loss": 0.6799, + "step": 25560 + }, + { + "epoch": 1.81, + "learning_rate": 1.1124362912032949e-06, + "loss": 0.6887, + "step": 25570 + }, + { + "epoch": 1.81, + "learning_rate": 1.10425119450977e-06, + "loss": 0.7054, + "step": 25580 + }, + { + "epoch": 1.81, + "learning_rate": 1.0960956412154777e-06, + "loss": 0.7283, + "step": 25590 + }, + { + "epoch": 1.81, + "learning_rate": 1.087969641403494e-06, + "loss": 0.7374, + "step": 25600 + }, + { + "epoch": 1.81, + "learning_rate": 1.0798732051203603e-06, + "loss": 0.7013, + "step": 25610 + }, + { + "epoch": 1.81, + "learning_rate": 1.0718063423760722e-06, + "loss": 0.6999, + "step": 25620 + }, + { + "epoch": 1.81, + "learning_rate": 1.0637690631440572e-06, + "loss": 0.7123, + "step": 25630 + }, + { + "epoch": 1.81, + "learning_rate": 1.0557613773611697e-06, + "loss": 0.6978, + "step": 25640 + }, + { + "epoch": 1.82, + "learning_rate": 1.0477832949276706e-06, + "loss": 0.7393, + "step": 25650 + }, + { + "epoch": 1.82, + "learning_rate": 1.0398348257072282e-06, + "loss": 0.727, + "step": 25660 + }, + { + "epoch": 1.82, + "learning_rate": 1.0319159795268984e-06, + "loss": 0.7193, + "step": 25670 + }, + { + "epoch": 1.82, + "learning_rate": 1.024026766177108e-06, + "loss": 0.7097, + "step": 25680 + }, + { + "epoch": 1.82, + "learning_rate": 1.0161671954116464e-06, + "loss": 0.7109, + "step": 25690 + }, + { + "epoch": 1.82, + "learning_rate": 1.0083372769476629e-06, + "loss": 0.7086, + "step": 25700 + }, + { + "epoch": 1.82, + "learning_rate": 1.0005370204656418e-06, + "loss": 0.7081, + "step": 25710 + }, + { + "epoch": 1.82, + "learning_rate": 9.927664356093908e-07, + "loss": 0.6914, + "step": 25720 + }, + { + "epoch": 1.82, + "learning_rate": 9.850255319860362e-07, + "loss": 0.6929, + "step": 25730 + }, + { + "epoch": 1.82, + "learning_rate": 9.773143191660116e-07, + "loss": 0.7269, + "step": 25740 + }, + { + "epoch": 1.82, + "learning_rate": 9.696328066830378e-07, + "loss": 0.7243, + "step": 25750 + }, + { + "epoch": 1.82, + "learning_rate": 9.61981004034121e-07, + "loss": 0.718, + "step": 25760 + }, + { + "epoch": 1.82, + "learning_rate": 9.54358920679524e-07, + "loss": 0.7295, + "step": 25770 + }, + { + "epoch": 1.82, + "learning_rate": 9.46766566042781e-07, + "loss": 0.7101, + "step": 25780 + }, + { + "epoch": 1.83, + "learning_rate": 9.392039495106642e-07, + "loss": 0.7296, + "step": 25790 + }, + { + "epoch": 1.83, + "learning_rate": 9.31671080433183e-07, + "loss": 0.7022, + "step": 25800 + }, + { + "epoch": 1.83, + "learning_rate": 9.241679681235572e-07, + "loss": 0.7167, + "step": 25810 + }, + { + "epoch": 1.83, + "learning_rate": 9.166946218582301e-07, + "loss": 0.7109, + "step": 25820 + }, + { + "epoch": 1.83, + "learning_rate": 9.092510508768387e-07, + "loss": 0.7036, + "step": 25830 + }, + { + "epoch": 1.83, + "learning_rate": 9.018372643822132e-07, + "loss": 0.7064, + "step": 25840 + }, + { + "epoch": 1.83, + "learning_rate": 8.944532715403408e-07, + "loss": 0.7124, + "step": 25850 + }, + { + "epoch": 1.83, + "learning_rate": 8.87099081480397e-07, + "loss": 0.7441, + "step": 25860 + }, + { + "epoch": 1.83, + "learning_rate": 8.797747032947001e-07, + "loss": 0.7099, + "step": 25870 + }, + { + "epoch": 1.83, + "learning_rate": 8.724801460387094e-07, + "loss": 0.7087, + "step": 25880 + }, + { + "epoch": 1.83, + "learning_rate": 8.652154187310218e-07, + "loss": 0.7032, + "step": 25890 + }, + { + "epoch": 1.83, + "learning_rate": 8.579805303533417e-07, + "loss": 0.7031, + "step": 25900 + }, + { + "epoch": 1.83, + "learning_rate": 8.507754898504943e-07, + "loss": 0.6833, + "step": 25910 + }, + { + "epoch": 1.83, + "learning_rate": 8.436003061304043e-07, + "loss": 0.7052, + "step": 25920 + }, + { + "epoch": 1.84, + "learning_rate": 8.364549880640671e-07, + "loss": 0.7, + "step": 25930 + }, + { + "epoch": 1.84, + "learning_rate": 8.293395444855662e-07, + "loss": 0.7127, + "step": 25940 + }, + { + "epoch": 1.84, + "learning_rate": 8.222539841920507e-07, + "loss": 0.709, + "step": 25950 + }, + { + "epoch": 1.84, + "learning_rate": 8.151983159437215e-07, + "loss": 0.6866, + "step": 25960 + }, + { + "epoch": 1.84, + "learning_rate": 8.081725484638176e-07, + "loss": 0.7142, + "step": 25970 + }, + { + "epoch": 1.84, + "learning_rate": 8.011766904386154e-07, + "loss": 0.7077, + "step": 25980 + }, + { + "epoch": 1.84, + "learning_rate": 7.942107505174102e-07, + "loss": 0.7226, + "step": 25990 + }, + { + "epoch": 1.84, + "learning_rate": 7.872747373125156e-07, + "loss": 0.7148, + "step": 26000 + }, + { + "epoch": 1.84, + "learning_rate": 7.80368659399236e-07, + "loss": 0.7326, + "step": 26010 + }, + { + "epoch": 1.84, + "learning_rate": 7.734925253158665e-07, + "loss": 0.7066, + "step": 26020 + }, + { + "epoch": 1.84, + "learning_rate": 7.666463435636873e-07, + "loss": 0.6938, + "step": 26030 + }, + { + "epoch": 1.84, + "learning_rate": 7.598301226069443e-07, + "loss": 0.6948, + "step": 26040 + }, + { + "epoch": 1.84, + "learning_rate": 7.53043870872841e-07, + "loss": 0.6797, + "step": 26050 + }, + { + "epoch": 1.84, + "learning_rate": 7.462875967515242e-07, + "loss": 0.7114, + "step": 26060 + }, + { + "epoch": 1.85, + "learning_rate": 7.395613085960873e-07, + "loss": 0.7184, + "step": 26070 + }, + { + "epoch": 1.85, + "learning_rate": 7.328650147225419e-07, + "loss": 0.6855, + "step": 26080 + }, + { + "epoch": 1.85, + "learning_rate": 7.261987234098238e-07, + "loss": 0.7092, + "step": 26090 + }, + { + "epoch": 1.85, + "learning_rate": 7.19562442899771e-07, + "loss": 0.717, + "step": 26100 + }, + { + "epoch": 1.85, + "learning_rate": 7.129561813971147e-07, + "loss": 0.7146, + "step": 26110 + }, + { + "epoch": 1.85, + "learning_rate": 7.06379947069477e-07, + "loss": 0.7148, + "step": 26120 + }, + { + "epoch": 1.85, + "learning_rate": 6.998337480473599e-07, + "loss": 0.688, + "step": 26130 + }, + { + "epoch": 1.85, + "learning_rate": 6.933175924241225e-07, + "loss": 0.7048, + "step": 26140 + }, + { + "epoch": 1.85, + "learning_rate": 6.868314882559845e-07, + "loss": 0.7307, + "step": 26150 + }, + { + "epoch": 1.85, + "learning_rate": 6.803754435620091e-07, + "loss": 0.721, + "step": 26160 + }, + { + "epoch": 1.85, + "learning_rate": 6.739494663241031e-07, + "loss": 0.7126, + "step": 26170 + }, + { + "epoch": 1.85, + "learning_rate": 6.675535644869891e-07, + "loss": 0.695, + "step": 26180 + }, + { + "epoch": 1.85, + "learning_rate": 6.611877459582083e-07, + "loss": 0.6855, + "step": 26190 + }, + { + "epoch": 1.85, + "learning_rate": 6.548520186081181e-07, + "loss": 0.7089, + "step": 26200 + }, + { + "epoch": 1.86, + "learning_rate": 6.485463902698635e-07, + "loss": 0.7166, + "step": 26210 + }, + { + "epoch": 1.86, + "learning_rate": 6.42270868739378e-07, + "loss": 0.6708, + "step": 26220 + }, + { + "epoch": 1.86, + "learning_rate": 6.360254617753719e-07, + "loss": 0.7116, + "step": 26230 + }, + { + "epoch": 1.86, + "learning_rate": 6.29810177099327e-07, + "loss": 0.727, + "step": 26240 + }, + { + "epoch": 1.86, + "learning_rate": 6.236250223954854e-07, + "loss": 0.7041, + "step": 26250 + }, + { + "epoch": 1.86, + "learning_rate": 6.174700053108329e-07, + "loss": 0.7394, + "step": 26260 + }, + { + "epoch": 1.86, + "learning_rate": 6.113451334550934e-07, + "loss": 0.696, + "step": 26270 + }, + { + "epoch": 1.86, + "learning_rate": 6.052504144007265e-07, + "loss": 0.725, + "step": 26280 + }, + { + "epoch": 1.86, + "learning_rate": 5.991858556829072e-07, + "loss": 0.7124, + "step": 26290 + }, + { + "epoch": 1.86, + "learning_rate": 5.93151464799535e-07, + "loss": 0.7376, + "step": 26300 + }, + { + "epoch": 1.86, + "learning_rate": 5.871472492111868e-07, + "loss": 0.7172, + "step": 26310 + }, + { + "epoch": 1.86, + "learning_rate": 5.811732163411549e-07, + "loss": 0.7413, + "step": 26320 + }, + { + "epoch": 1.86, + "learning_rate": 5.75229373575406e-07, + "loss": 0.7297, + "step": 26330 + }, + { + "epoch": 1.86, + "learning_rate": 5.693157282625872e-07, + "loss": 0.7237, + "step": 26340 + }, + { + "epoch": 1.87, + "learning_rate": 5.634322877140025e-07, + "loss": 0.6955, + "step": 26350 + }, + { + "epoch": 1.87, + "learning_rate": 5.575790592036139e-07, + "loss": 0.7184, + "step": 26360 + }, + { + "epoch": 1.87, + "learning_rate": 5.517560499680358e-07, + "loss": 0.7002, + "step": 26370 + }, + { + "epoch": 1.87, + "learning_rate": 5.459632672065229e-07, + "loss": 0.6985, + "step": 26380 + }, + { + "epoch": 1.87, + "learning_rate": 5.402007180809493e-07, + "loss": 0.7, + "step": 26390 + }, + { + "epoch": 1.87, + "learning_rate": 5.344684097158215e-07, + "loss": 0.6975, + "step": 26400 + }, + { + "epoch": 1.87, + "learning_rate": 5.287663491982481e-07, + "loss": 0.6832, + "step": 26410 + }, + { + "epoch": 1.87, + "learning_rate": 5.230945435779483e-07, + "loss": 0.689, + "step": 26420 + }, + { + "epoch": 1.87, + "learning_rate": 5.174529998672322e-07, + "loss": 0.7153, + "step": 26430 + }, + { + "epoch": 1.87, + "learning_rate": 5.118417250409957e-07, + "loss": 0.7424, + "step": 26440 + }, + { + "epoch": 1.87, + "learning_rate": 5.062607260367142e-07, + "loss": 0.6872, + "step": 26450 + }, + { + "epoch": 1.87, + "learning_rate": 5.007100097544293e-07, + "loss": 0.7121, + "step": 26460 + }, + { + "epoch": 1.87, + "learning_rate": 4.95189583056746e-07, + "loss": 0.7051, + "step": 26470 + }, + { + "epoch": 1.87, + "learning_rate": 4.896994527688104e-07, + "loss": 0.7067, + "step": 26480 + }, + { + "epoch": 1.88, + "learning_rate": 4.842396256783288e-07, + "loss": 0.7005, + "step": 26490 + }, + { + "epoch": 1.88, + "learning_rate": 4.788101085355267e-07, + "loss": 0.706, + "step": 26500 + }, + { + "epoch": 1.88, + "learning_rate": 4.7341090805317346e-07, + "loss": 0.7267, + "step": 26510 + }, + { + "epoch": 1.88, + "learning_rate": 4.68042030906532e-07, + "loss": 0.7193, + "step": 26520 + }, + { + "epoch": 1.88, + "learning_rate": 4.6270348373339545e-07, + "loss": 0.6997, + "step": 26530 + }, + { + "epoch": 1.88, + "learning_rate": 4.573952731340536e-07, + "loss": 0.7155, + "step": 26540 + }, + { + "epoch": 1.88, + "learning_rate": 4.5211740567129014e-07, + "loss": 0.6904, + "step": 26550 + }, + { + "epoch": 1.88, + "learning_rate": 4.468698878703631e-07, + "loss": 0.7309, + "step": 26560 + }, + { + "epoch": 1.88, + "learning_rate": 4.416527262190218e-07, + "loss": 0.6915, + "step": 26570 + }, + { + "epoch": 1.88, + "learning_rate": 4.3646592716748423e-07, + "loss": 0.7185, + "step": 26580 + }, + { + "epoch": 1.88, + "learning_rate": 4.3130949712842093e-07, + "loss": 0.695, + "step": 26590 + }, + { + "epoch": 1.88, + "learning_rate": 4.2618344247696284e-07, + "loss": 0.7061, + "step": 26600 + }, + { + "epoch": 1.88, + "learning_rate": 4.210877695506793e-07, + "loss": 0.7159, + "step": 26610 + }, + { + "epoch": 1.88, + "learning_rate": 4.1602248464958914e-07, + "loss": 0.7226, + "step": 26620 + }, + { + "epoch": 1.88, + "learning_rate": 4.109875940361302e-07, + "loss": 0.6967, + "step": 26630 + }, + { + "epoch": 1.89, + "learning_rate": 4.059831039351675e-07, + "loss": 0.6913, + "step": 26640 + }, + { + "epoch": 1.89, + "learning_rate": 4.010090205339767e-07, + "loss": 0.6682, + "step": 26650 + }, + { + "epoch": 1.89, + "learning_rate": 3.960653499822442e-07, + "loss": 0.7328, + "step": 26660 + }, + { + "epoch": 1.89, + "learning_rate": 3.911520983920558e-07, + "loss": 0.7028, + "step": 26670 + }, + { + "epoch": 1.89, + "learning_rate": 3.862692718378885e-07, + "loss": 0.6989, + "step": 26680 + }, + { + "epoch": 1.89, + "learning_rate": 3.8141687635659953e-07, + "loss": 0.6951, + "step": 26690 + }, + { + "epoch": 1.89, + "learning_rate": 3.7659491794742595e-07, + "loss": 0.7256, + "step": 26700 + }, + { + "epoch": 1.89, + "learning_rate": 3.718034025719769e-07, + "loss": 0.6671, + "step": 26710 + }, + { + "epoch": 1.89, + "learning_rate": 3.670423361542191e-07, + "loss": 0.7007, + "step": 26720 + }, + { + "epoch": 1.89, + "learning_rate": 3.623117245804719e-07, + "loss": 0.6817, + "step": 26730 + }, + { + "epoch": 1.89, + "learning_rate": 3.576115736994096e-07, + "loss": 0.7201, + "step": 26740 + }, + { + "epoch": 1.89, + "learning_rate": 3.5294188932203653e-07, + "loss": 0.7237, + "step": 26750 + }, + { + "epoch": 1.89, + "learning_rate": 3.4830267722170684e-07, + "loss": 0.6799, + "step": 26760 + }, + { + "epoch": 1.89, + "learning_rate": 3.436939431340769e-07, + "loss": 0.7261, + "step": 26770 + }, + { + "epoch": 1.9, + "learning_rate": 3.3911569275713594e-07, + "loss": 0.703, + "step": 26780 + }, + { + "epoch": 1.9, + "learning_rate": 3.3456793175118685e-07, + "loss": 0.6719, + "step": 26790 + }, + { + "epoch": 1.9, + "learning_rate": 3.300506657388347e-07, + "loss": 0.6846, + "step": 26800 + }, + { + "epoch": 1.9, + "learning_rate": 3.25563900304976e-07, + "loss": 0.6572, + "step": 26810 + }, + { + "epoch": 1.9, + "learning_rate": 3.211076409968039e-07, + "loss": 0.722, + "step": 26820 + }, + { + "epoch": 1.9, + "learning_rate": 3.1668189332379194e-07, + "loss": 0.7193, + "step": 26830 + }, + { + "epoch": 1.9, + "learning_rate": 3.1228666275769925e-07, + "loss": 0.7138, + "step": 26840 + }, + { + "epoch": 1.9, + "learning_rate": 3.079219547325429e-07, + "loss": 0.7114, + "step": 26850 + }, + { + "epoch": 1.9, + "learning_rate": 3.03587774644612e-07, + "loss": 0.6791, + "step": 26860 + }, + { + "epoch": 1.9, + "learning_rate": 2.9928412785244777e-07, + "loss": 0.6882, + "step": 26870 + }, + { + "epoch": 1.9, + "learning_rate": 2.950110196768496e-07, + "loss": 0.7039, + "step": 26880 + }, + { + "epoch": 1.9, + "learning_rate": 2.9076845540084993e-07, + "loss": 0.6818, + "step": 26890 + }, + { + "epoch": 1.9, + "learning_rate": 2.8655644026972514e-07, + "loss": 0.7056, + "step": 26900 + }, + { + "epoch": 1.9, + "learning_rate": 2.823749794909819e-07, + "loss": 0.7029, + "step": 26910 + }, + { + "epoch": 1.91, + "learning_rate": 2.782240782343487e-07, + "loss": 0.7107, + "step": 26920 + }, + { + "epoch": 1.91, + "learning_rate": 2.741037416317732e-07, + "loss": 0.7263, + "step": 26930 + }, + { + "epoch": 1.91, + "learning_rate": 2.700139747774166e-07, + "loss": 0.6971, + "step": 26940 + }, + { + "epoch": 1.91, + "learning_rate": 2.6595478272763985e-07, + "loss": 0.7139, + "step": 26950 + }, + { + "epoch": 1.91, + "learning_rate": 2.6192617050100897e-07, + "loss": 0.6651, + "step": 26960 + }, + { + "epoch": 1.91, + "learning_rate": 2.579281430782815e-07, + "loss": 0.7181, + "step": 26970 + }, + { + "epoch": 1.91, + "learning_rate": 2.539607054023979e-07, + "loss": 0.6995, + "step": 26980 + }, + { + "epoch": 1.91, + "learning_rate": 2.50023862378479e-07, + "loss": 0.699, + "step": 26990 + }, + { + "epoch": 1.91, + "learning_rate": 2.4611761887382844e-07, + "loss": 0.718, + "step": 27000 + }, + { + "epoch": 1.91, + "learning_rate": 2.4224197971791094e-07, + "loss": 0.6785, + "step": 27010 + }, + { + "epoch": 1.91, + "learning_rate": 2.3839694970234917e-07, + "loss": 0.7222, + "step": 27020 + }, + { + "epoch": 1.91, + "learning_rate": 2.3458253358093485e-07, + "loss": 0.6785, + "step": 27030 + }, + { + "epoch": 1.91, + "learning_rate": 2.3079873606960122e-07, + "loss": 0.6973, + "step": 27040 + }, + { + "epoch": 1.91, + "learning_rate": 2.27045561846434e-07, + "loss": 0.7146, + "step": 27050 + }, + { + "epoch": 1.92, + "learning_rate": 2.2332301555164638e-07, + "loss": 0.7284, + "step": 27060 + }, + { + "epoch": 1.92, + "learning_rate": 2.1963110178759017e-07, + "loss": 0.702, + "step": 27070 + }, + { + "epoch": 1.92, + "learning_rate": 2.159698251187531e-07, + "loss": 0.7291, + "step": 27080 + }, + { + "epoch": 1.92, + "learning_rate": 2.1233919007173086e-07, + "loss": 0.7087, + "step": 27090 + }, + { + "epoch": 1.92, + "learning_rate": 2.0873920113524958e-07, + "loss": 0.7054, + "step": 27100 + }, + { + "epoch": 1.92, + "learning_rate": 2.0516986276013227e-07, + "loss": 0.6835, + "step": 27110 + }, + { + "epoch": 1.92, + "learning_rate": 2.0163117935931564e-07, + "loss": 0.7169, + "step": 27120 + }, + { + "epoch": 1.92, + "learning_rate": 1.9812315530783886e-07, + "loss": 0.6897, + "step": 27130 + }, + { + "epoch": 1.92, + "learning_rate": 1.9464579494282708e-07, + "loss": 0.7121, + "step": 27140 + }, + { + "epoch": 1.92, + "learning_rate": 1.911991025634996e-07, + "loss": 0.7111, + "step": 27150 + }, + { + "epoch": 1.92, + "learning_rate": 1.8778308243115884e-07, + "loss": 0.7009, + "step": 27160 + }, + { + "epoch": 1.92, + "learning_rate": 1.8439773876918753e-07, + "loss": 0.6859, + "step": 27170 + }, + { + "epoch": 1.92, + "learning_rate": 1.810430757630377e-07, + "loss": 0.7101, + "step": 27180 + }, + { + "epoch": 1.92, + "learning_rate": 1.7771909756023054e-07, + "loss": 0.6874, + "step": 27190 + }, + { + "epoch": 1.93, + "learning_rate": 1.7442580827035094e-07, + "loss": 0.7033, + "step": 27200 + }, + { + "epoch": 1.93, + "learning_rate": 1.7116321196504749e-07, + "loss": 0.66, + "step": 27210 + }, + { + "epoch": 1.93, + "learning_rate": 1.679313126780102e-07, + "loss": 0.7304, + "step": 27220 + }, + { + "epoch": 1.93, + "learning_rate": 1.6473011440498454e-07, + "loss": 0.6865, + "step": 27230 + }, + { + "epoch": 1.93, + "learning_rate": 1.615596211037601e-07, + "loss": 0.6943, + "step": 27240 + }, + { + "epoch": 1.93, + "learning_rate": 1.5841983669415695e-07, + "loss": 0.6896, + "step": 27250 + }, + { + "epoch": 1.93, + "learning_rate": 1.5531076505803655e-07, + "loss": 0.7158, + "step": 27260 + }, + { + "epoch": 1.93, + "learning_rate": 1.5223241003928524e-07, + "loss": 0.7259, + "step": 27270 + }, + { + "epoch": 1.93, + "learning_rate": 1.491847754438086e-07, + "loss": 0.715, + "step": 27280 + }, + { + "epoch": 1.93, + "learning_rate": 1.461678650395426e-07, + "loss": 0.6879, + "step": 27290 + }, + { + "epoch": 1.93, + "learning_rate": 1.4318168255642585e-07, + "loss": 0.6827, + "step": 27300 + }, + { + "epoch": 1.93, + "learning_rate": 1.4022623168641336e-07, + "loss": 0.7186, + "step": 27310 + }, + { + "epoch": 1.93, + "learning_rate": 1.3730151608346285e-07, + "loss": 0.692, + "step": 27320 + }, + { + "epoch": 1.93, + "learning_rate": 1.344075393635319e-07, + "loss": 0.7285, + "step": 27330 + }, + { + "epoch": 1.94, + "learning_rate": 1.3154430510457506e-07, + "loss": 0.7337, + "step": 27340 + }, + { + "epoch": 1.94, + "learning_rate": 1.2871181684654399e-07, + "loss": 0.6814, + "step": 27350 + }, + { + "epoch": 1.94, + "learning_rate": 1.2591007809136247e-07, + "loss": 0.7109, + "step": 27360 + }, + { + "epoch": 1.94, + "learning_rate": 1.2313909230295684e-07, + "loss": 0.6875, + "step": 27370 + }, + { + "epoch": 1.94, + "learning_rate": 1.2039886290722003e-07, + "loss": 0.6764, + "step": 27380 + }, + { + "epoch": 1.94, + "learning_rate": 1.1768939329201978e-07, + "loss": 0.6924, + "step": 27390 + }, + { + "epoch": 1.94, + "learning_rate": 1.1501068680719595e-07, + "loss": 0.7136, + "step": 27400 + }, + { + "epoch": 1.94, + "learning_rate": 1.1236274676456049e-07, + "loss": 0.7331, + "step": 27410 + }, + { + "epoch": 1.94, + "learning_rate": 1.1000590874080952e-07, + "loss": 0.737, + "step": 27420 + }, + { + "epoch": 1.94, + "learning_rate": 1.0741643392611355e-07, + "loss": 0.6887, + "step": 27430 + }, + { + "epoch": 1.94, + "learning_rate": 1.0485773494272321e-07, + "loss": 0.7165, + "step": 27440 + }, + { + "epoch": 1.94, + "learning_rate": 1.023298149540719e-07, + "loss": 0.6757, + "step": 27450 + }, + { + "epoch": 1.94, + "learning_rate": 9.983267708554289e-08, + "loss": 0.6937, + "step": 27460 + }, + { + "epoch": 1.94, + "learning_rate": 9.736632442446103e-08, + "loss": 0.6886, + "step": 27470 + }, + { + "epoch": 1.95, + "learning_rate": 9.493076002008994e-08, + "loss": 0.7121, + "step": 27480 + }, + { + "epoch": 1.95, + "learning_rate": 9.252598688362924e-08, + "loss": 0.7186, + "step": 27490 + }, + { + "epoch": 1.95, + "learning_rate": 9.0152007988209e-08, + "loss": 0.7159, + "step": 27500 + }, + { + "epoch": 1.95, + "learning_rate": 8.780882626888421e-08, + "loss": 0.716, + "step": 27510 + }, + { + "epoch": 1.95, + "learning_rate": 8.572629670220156e-08, + "loss": 0.7589, + "step": 27520 + }, + { + "epoch": 1.95, + "learning_rate": 8.34416375671776e-08, + "loss": 0.7078, + "step": 27530 + }, + { + "epoch": 1.95, + "learning_rate": 8.118778390458492e-08, + "loss": 0.7089, + "step": 27540 + }, + { + "epoch": 1.95, + "learning_rate": 7.89647385009723e-08, + "loss": 0.723, + "step": 27550 + }, + { + "epoch": 1.95, + "learning_rate": 7.677250410478287e-08, + "loss": 0.7296, + "step": 27560 + }, + { + "epoch": 1.95, + "learning_rate": 7.46110834263819e-08, + "loss": 0.7196, + "step": 27570 + }, + { + "epoch": 1.95, + "learning_rate": 7.248047913802902e-08, + "loss": 0.7115, + "step": 27580 + }, + { + "epoch": 1.95, + "learning_rate": 7.038069387388658e-08, + "loss": 0.7485, + "step": 27590 + }, + { + "epoch": 1.95, + "learning_rate": 6.83117302300168e-08, + "loss": 0.6943, + "step": 27600 + }, + { + "epoch": 1.95, + "learning_rate": 6.627359076437357e-08, + "loss": 0.669, + "step": 27610 + }, + { + "epoch": 1.96, + "learning_rate": 6.426627799679674e-08, + "loss": 0.7157, + "step": 27620 + }, + { + "epoch": 1.96, + "learning_rate": 6.228979440902339e-08, + "loss": 0.6938, + "step": 27630 + }, + { + "epoch": 1.96, + "learning_rate": 6.034414244466547e-08, + "loss": 0.7006, + "step": 27640 + }, + { + "epoch": 1.96, + "learning_rate": 5.842932450922656e-08, + "loss": 0.6623, + "step": 27650 + }, + { + "epoch": 1.96, + "learning_rate": 5.6545342970076855e-08, + "loss": 0.6881, + "step": 27660 + }, + { + "epoch": 1.96, + "learning_rate": 5.4692200156472583e-08, + "loss": 0.6943, + "step": 27670 + }, + { + "epoch": 1.96, + "learning_rate": 5.286989835953382e-08, + "loss": 0.7158, + "step": 27680 + }, + { + "epoch": 1.96, + "learning_rate": 5.107843983226113e-08, + "loss": 0.7247, + "step": 27690 + }, + { + "epoch": 1.96, + "learning_rate": 4.931782678951058e-08, + "loss": 0.7313, + "step": 27700 + }, + { + "epoch": 1.96, + "learning_rate": 4.7588061408013195e-08, + "loss": 0.7065, + "step": 27710 + }, + { + "epoch": 1.96, + "learning_rate": 4.588914582635273e-08, + "loss": 0.7159, + "step": 27720 + }, + { + "epoch": 1.96, + "learning_rate": 4.422108214497678e-08, + "loss": 0.7211, + "step": 27730 + }, + { + "epoch": 1.96, + "learning_rate": 4.2583872426185665e-08, + "loss": 0.7073, + "step": 27740 + }, + { + "epoch": 1.96, + "learning_rate": 4.097751869414079e-08, + "loss": 0.7067, + "step": 27750 + }, + { + "epoch": 1.96, + "learning_rate": 3.940202293484518e-08, + "loss": 0.7176, + "step": 27760 + }, + { + "epoch": 1.97, + "learning_rate": 3.785738709616016e-08, + "loss": 0.7229, + "step": 27770 + }, + { + "epoch": 1.97, + "learning_rate": 3.634361308778866e-08, + "loss": 0.6988, + "step": 27780 + }, + { + "epoch": 1.97, + "learning_rate": 3.486070278127529e-08, + "loss": 0.7275, + "step": 27790 + }, + { + "epoch": 1.97, + "learning_rate": 3.3408658010011805e-08, + "loss": 0.6845, + "step": 27800 + }, + { + "epoch": 1.97, + "learning_rate": 3.198748056922607e-08, + "loss": 0.733, + "step": 27810 + }, + { + "epoch": 1.97, + "learning_rate": 3.0597172215982014e-08, + "loss": 0.7463, + "step": 27820 + }, + { + "epoch": 1.97, + "learning_rate": 2.9237734669185223e-08, + "loss": 0.725, + "step": 27830 + }, + { + "epoch": 1.97, + "learning_rate": 2.7909169609571796e-08, + "loss": 0.7233, + "step": 27840 + }, + { + "epoch": 1.97, + "learning_rate": 2.6611478679700043e-08, + "loss": 0.7196, + "step": 27850 + }, + { + "epoch": 1.97, + "learning_rate": 2.5344663483967135e-08, + "loss": 0.7174, + "step": 27860 + }, + { + "epoch": 1.97, + "learning_rate": 2.4108725588595227e-08, + "loss": 0.6863, + "step": 27870 + }, + { + "epoch": 1.97, + "learning_rate": 2.290366652163145e-08, + "loss": 0.683, + "step": 27880 + }, + { + "epoch": 1.97, + "learning_rate": 2.1729487772945145e-08, + "loss": 0.7095, + "step": 27890 + }, + { + "epoch": 1.97, + "learning_rate": 2.0586190794222303e-08, + "loss": 0.6825, + "step": 27900 + }, + { + "epoch": 1.98, + "learning_rate": 1.947377699897668e-08, + "loss": 0.7083, + "step": 27910 + }, + { + "epoch": 1.98, + "learning_rate": 1.8392247762535898e-08, + "loss": 0.7009, + "step": 27920 + }, + { + "epoch": 1.98, + "learning_rate": 1.734160442204147e-08, + "loss": 0.6894, + "step": 27930 + }, + { + "epoch": 1.98, + "learning_rate": 1.6321848276454332e-08, + "loss": 0.731, + "step": 27940 + }, + { + "epoch": 1.98, + "learning_rate": 1.5332980586543756e-08, + "loss": 0.6969, + "step": 27950 + }, + { + "epoch": 1.98, + "learning_rate": 1.4375002574890106e-08, + "loss": 0.7128, + "step": 27960 + }, + { + "epoch": 1.98, + "learning_rate": 1.3447915425890411e-08, + "loss": 0.6867, + "step": 27970 + }, + { + "epoch": 1.98, + "learning_rate": 1.2551720285738922e-08, + "loss": 0.7137, + "step": 27980 + }, + { + "epoch": 1.98, + "learning_rate": 1.1686418262443766e-08, + "loss": 0.6866, + "step": 27990 + }, + { + "epoch": 1.98, + "learning_rate": 1.0852010425818627e-08, + "loss": 0.7063, + "step": 28000 + }, + { + "epoch": 1.98, + "learning_rate": 1.0048497807479963e-08, + "loss": 0.6913, + "step": 28010 + }, + { + "epoch": 1.98, + "learning_rate": 9.275881400844232e-09, + "loss": 0.7079, + "step": 28020 + }, + { + "epoch": 1.98, + "learning_rate": 8.534162161136228e-09, + "loss": 0.7065, + "step": 28030 + }, + { + "epoch": 1.98, + "learning_rate": 7.823341005372409e-09, + "loss": 0.6902, + "step": 28040 + }, + { + "epoch": 1.99, + "learning_rate": 7.143418812377567e-09, + "loss": 0.7062, + "step": 28050 + }, + { + "epoch": 1.99, + "learning_rate": 6.494396422768168e-09, + "loss": 0.6914, + "step": 28060 + }, + { + "epoch": 1.99, + "learning_rate": 5.876274638960677e-09, + "loss": 0.698, + "step": 28070 + }, + { + "epoch": 1.99, + "learning_rate": 5.289054225166012e-09, + "loss": 0.7074, + "step": 28080 + }, + { + "epoch": 1.99, + "learning_rate": 4.732735907392316e-09, + "loss": 0.6969, + "step": 28090 + }, + { + "epoch": 1.99, + "learning_rate": 4.207320373442181e-09, + "loss": 0.7052, + "step": 28100 + }, + { + "epoch": 1.99, + "learning_rate": 3.7128082729098734e-09, + "loss": 0.7215, + "step": 28110 + }, + { + "epoch": 1.99, + "learning_rate": 3.2492002171813362e-09, + "loss": 0.7025, + "step": 28120 + }, + { + "epoch": 1.99, + "learning_rate": 2.8164967794397366e-09, + "loss": 0.7059, + "step": 28130 + }, + { + "epoch": 1.99, + "learning_rate": 2.4146984946515907e-09, + "loss": 0.7147, + "step": 28140 + }, + { + "epoch": 1.99, + "learning_rate": 2.043805859583414e-09, + "loss": 0.7081, + "step": 28150 + }, + { + "epoch": 1.99, + "learning_rate": 1.7038193327822972e-09, + "loss": 0.7071, + "step": 28160 + }, + { + "epoch": 1.99, + "learning_rate": 1.3947393345925541e-09, + "loss": 0.7082, + "step": 28170 + }, + { + "epoch": 1.99, + "learning_rate": 1.1165662471390724e-09, + "loss": 0.6879, + "step": 28180 + }, + { + "epoch": 2.0, + "learning_rate": 8.693004143467409e-10, + "loss": 0.6987, + "step": 28190 + }, + { + "epoch": 2.0, + "learning_rate": 6.529421419154691e-10, + "loss": 0.725, + "step": 28200 + }, + { + "epoch": 2.0, + "learning_rate": 4.674916973396171e-10, + "loss": 0.7408, + "step": 28210 + }, + { + "epoch": 2.0, + "learning_rate": 3.129493099024439e-10, + "loss": 0.6983, + "step": 28220 + }, + { + "epoch": 2.0, + "learning_rate": 1.8931517067333204e-10, + "loss": 0.7114, + "step": 28230 + }, + { + "epoch": 2.0, + "learning_rate": 9.658943250223651e-11, + "loss": 0.7046, + "step": 28240 + }, + { + "epoch": 2.0, + "learning_rate": 3.4772210030786786e-11, + "loss": 0.7057, + "step": 28250 + }, + { + "epoch": 2.0, + "step": 28254, + "total_flos": 6.877628315524399e+18, + "train_loss": 0.7399377094979515, + "train_runtime": 176487.3633, + "train_samples_per_second": 2.562, + "train_steps_per_second": 0.16 + } + ], + "max_steps": 28254, + "num_train_epochs": 2, + "total_flos": 6.877628315524399e+18, + "trial_name": null, + "trial_params": null +} diff --git a/training_args.bin b/training_args.bin new file mode 100644 index 0000000000000000000000000000000000000000..4b7a4c456ed3fcd8d2f851cd7cb60b782ce18bc2 --- /dev/null +++ b/training_args.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:221face861d281c49061d94e69a5df2e8356d17457f5f4ef2f014d70fd21249c +size 3271 diff --git a/training_loss.png b/training_loss.png new file mode 100644 index 0000000000000000000000000000000000000000..ba41073f46bcba85f83f4e14e74b7634a646bf20 Binary files /dev/null and b/training_loss.png differ